From e6f3f7e4dc76eb8d8a546dc66621a02c5c84f4ac Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Fri, 12 Apr 2019 11:41:30 +0200 Subject: spi: Add spi_is_bpw_supported() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This let SPI clients check if the controller supports a particular word width. drivers/gpu/drm/tinydrm/mipi-dbi.c will use this to determine if the controller supports 16-bit for RGB565 pixels. If it doesn't it will swap the bytes before transfer on little endian machines. Signed-off-by: Noralf Trønnes Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 662b336aa2e4..b30e3d13a5ac 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -983,6 +983,26 @@ spi_max_transfer_size(struct spi_device *spi) return min(tr_max, msg_max); } +/** + * spi_is_bpw_supported - Check if bits per word is supported + * @spi: SPI device + * @bpw: Bits per word + * + * This function checks to see if the SPI controller supports @bpw. + * + * Returns: + * True if @bpw is supported, false otherwise. + */ +static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) +{ + u32 bpw_mask = spi->master->bits_per_word_mask; + + if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) + return true; + + return false; +} + /*---------------------------------------------------------------------------*/ /* SPI transfer replacement methods which make use of spi_res */ -- cgit v1.2.3 From 67b886d290052dbf2bcfc876a5ae41a5fe461edf Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 21 Mar 2019 15:09:56 -0500 Subject: dma-buf: Remove leftover [un]map_atomic comments The map_atomic/unmap_atomic callbacks have been removed, remove the related comments. Fixes: f664a5269542 ("dma-buf: remove kmap_atomic interface") Signed-off-by: Andrew F. Davis Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20190321200957.16938-1-afd@ti.com --- include/linux/dma-buf.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 58725f890b5b..e4a8dab2bc54 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,11 +39,6 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @map_atomic: [optional] maps a page from the buffer into kernel address - * space, users may not block until the subsequent unmap call. - * This callback must not sleep. - * @unmap_atomic: [optional] unmaps a atomically mapped page from the buffer. - * This Callback must not sleep. * @map: [optional] maps a page from the buffer into kernel address space. * @unmap: [optional] unmaps a page from the buffer. * @vmap: [optional] creates a virtual mapping for the buffer into kernel -- cgit v1.2.3 From d5ae7712b7ffbb435e8f3d98f2123eff4734c77f Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 21 Mar 2019 15:09:57 -0500 Subject: dma-buf: Update [un]map documentation to match the other functions Other function have inline documentation, a couple still have theirs at the top of the structure, update the docs and move them inline. Signed-off-by: Andrew F. Davis Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20190321200957.16938-2-afd@ti.com --- include/linux/dma-buf.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index e4a8dab2bc54..a0bd071466fc 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,8 +39,6 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @map: [optional] maps a page from the buffer into kernel address space. - * @unmap: [optional] unmaps a page from the buffer. * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. * @vunmap: [optional] unmaps a vmap from the buffer @@ -200,8 +198,6 @@ struct dma_buf_ops { * to be restarted. */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); - void *(*map)(struct dma_buf *, unsigned long); - void (*unmap)(struct dma_buf *, unsigned long, void *); /** * @mmap: @@ -240,6 +236,31 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); + /** + * @map: + * + * Maps a page from the buffer into kernel address space. The page is + * specified by offset into the buffer in PAGE_SIZE units. + * + * This callback is optional. + * + * Returns: + * + * Virtual address pointer where requested page can be accessed. NULL + * on error or when this function is unimplemented by the exporter. + */ + void *(*map)(struct dma_buf *, unsigned long); + + /** + * @unmap: + * + * Unmaps a page from the buffer. Page offset and address pointer should + * be the same as the one passed to and returned by matching call to map. + * + * This callback is optional. + */ + void (*unmap)(struct dma_buf *, unsigned long, void *); + void *(*vmap)(struct dma_buf *); void (*vunmap)(struct dma_buf *, void *vaddr); }; -- cgit v1.2.3 From 0ff2de8bb163551ec4230a5a6f3c40c1f6adec4f Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 23 Feb 2019 08:49:48 +0000 Subject: spi: core: allow defining time that cs is deasserted For some SPI devices that support speed_hz > 1MHz the default 10 us delay when cs_change = 1 is typically way to long and may result in poor spi bus utilization. This patch makes it possible to control the delay at micro or nano second resolution on a per spi_transfer basis. It even allows an "as fast as possible" mode with: xfer.cs_change_delay_unit = SPI_DELAY_UNIT_NSECS; xfer.cs_change_delay = 0; The delay code is shared between delay_usecs and cs_change_delay for consistency and reuse, so in the future this change_delay_unit could also apply to delay_usec as well. Note that on slower SOCs/CPU actually reaching ns deasserts on cs is not realistic as the gpio overhead alone (without any delays added ) may already leave cs deasserted for more than 1us - at least on a raspberry pi. But at the very least this way we can keep it as short as possible. Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- drivers/spi/spi.c | 59 ++++++++++++++++++++++++++++++++++++++++--------- include/linux/spi/spi.h | 7 ++++++ 2 files changed, 56 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 5e75944ad5d1..7e8ffe3fdc00 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1090,6 +1090,52 @@ static int spi_transfer_wait(struct spi_controller *ctlr, return 0; } +static void _spi_transfer_delay_ns(u32 ns) +{ + if (!ns) + return; + if (ns <= 1000) { + ndelay(ns); + } else { + u32 us = DIV_ROUND_UP(ns, 1000); + + if (us <= 10) + udelay(us); + else + usleep_range(us, us + DIV_ROUND_UP(us, 10)); + } +} + +static void _spi_transfer_cs_change_delay(struct spi_message *msg, + struct spi_transfer *xfer) +{ + u32 delay = xfer->cs_change_delay; + u32 unit = xfer->cs_change_delay_unit; + + /* return early on "fast" mode - for everything but USECS */ + if (!delay && unit != SPI_DELAY_UNIT_USECS) + return; + + switch (unit) { + case SPI_DELAY_UNIT_USECS: + /* for compatibility use default of 10us */ + if (!delay) + delay = 10000; + else + delay *= 1000; + break; + case SPI_DELAY_UNIT_NSECS: /* nothing to do here */ + break; + default: + dev_err_once(&msg->spi->dev, + "Use of unsupported delay unit %i, using default of 10us\n", + xfer->cs_change_delay_unit); + delay = 10000; + } + /* now sleep for the requested amount of time */ + _spi_transfer_delay_ns(delay); +} + /* * spi_transfer_one_message - Default implementation of transfer_one_message() * @@ -1148,14 +1194,8 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, if (msg->status != -EINPROGRESS) goto out; - if (xfer->delay_usecs) { - u16 us = xfer->delay_usecs; - - if (us <= 10) - udelay(us); - else - usleep_range(us, us + DIV_ROUND_UP(us, 10)); - } + if (xfer->delay_usecs) + _spi_transfer_delay_ns(xfer->delay_usecs * 1000); if (xfer->cs_change) { if (list_is_last(&xfer->transfer_list, @@ -1163,7 +1203,7 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, keep_cs = true; } else { spi_set_cs(msg->spi, false); - udelay(10); + _spi_transfer_cs_change_delay(msg, xfer); spi_set_cs(msg->spi, true); } } @@ -3757,4 +3797,3 @@ err0: * include needing to have boardinfo data structures be much more public. */ postcore_initcall(spi_init); - diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 053abd22ad31..023beb9e9e4b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -735,6 +735,9 @@ extern void spi_res_release(struct spi_controller *ctlr, * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @cs_change: affects chipselect after this transfer completes + * @cs_change_delay: delay between cs deassert and assert when + * @cs_change is set and @spi_transfer is not the last in @spi_message + * @cs_change_delay_unit: unit of cs_change_delay * @delay_usecs: microseconds to delay after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. @@ -824,6 +827,10 @@ struct spi_transfer { u8 bits_per_word; u8 word_delay_usecs; u16 delay_usecs; + u16 cs_change_delay; + u8 cs_change_delay_unit; +#define SPI_DELAY_UNIT_USECS 0 +#define SPI_DELAY_UNIT_NSECS 1 u32 speed_hz; u16 word_delay; -- cgit v1.2.3 From d5864e5bed96db7230da45463d6ae7af5b3b4399 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 23 Feb 2019 08:49:50 +0000 Subject: spi: core: allow defining time that cs is deasserted as a multiple of SCK Support setting a delay between cs assert and deassert as a multiple of spi clock length. Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- drivers/spi/spi.c | 8 ++++++++ include/linux/spi/spi.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 7e8ffe3fdc00..cfa3c3decb8a 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1111,6 +1111,7 @@ static void _spi_transfer_cs_change_delay(struct spi_message *msg, { u32 delay = xfer->cs_change_delay; u32 unit = xfer->cs_change_delay_unit; + u32 hz; /* return early on "fast" mode - for everything but USECS */ if (!delay && unit != SPI_DELAY_UNIT_USECS) @@ -1126,6 +1127,13 @@ static void _spi_transfer_cs_change_delay(struct spi_message *msg, break; case SPI_DELAY_UNIT_NSECS: /* nothing to do here */ break; + case SPI_DELAY_UNIT_SCK: + /* if there is no effective speed know, then approximate + * by underestimating with half the requested hz + */ + hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2; + delay *= DIV_ROUND_UP(1000000000, hz); + break; default: dev_err_once(&msg->spi->dev, "Use of unsupported delay unit %i, using default of 10us\n", diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 023beb9e9e4b..e552a036cb4d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -831,6 +831,7 @@ struct spi_transfer { u8 cs_change_delay_unit; #define SPI_DELAY_UNIT_USECS 0 #define SPI_DELAY_UNIT_NSECS 1 +#define SPI_DELAY_UNIT_SCK 2 u32 speed_hz; u16 word_delay; -- cgit v1.2.3 From aec71d794731c441a9b7ee9705efedd2f6054173 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Tue, 7 May 2019 15:08:14 +0200 Subject: livepatch: Remove stale kobj_added entries from kernel-doc descriptions Commit 4d141ab3416d ("livepatch: Remove custom kobject state handling") removed kobj_added members of klp_func, klp_object and klp_patch structures. kernel-doc descriptions were omitted by accident. Remove them. Reported-by: Kamalesh Babulal Signed-off-by: Miroslav Benes Acked-by: Joe Lawrence Reviewed-by: Kamalesh Babulal Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index a14bab1a0a3e..955d46f37b72 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -47,7 +47,6 @@ * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function - * @kobj_added: @kobj has been added and needs freeing * @nop: temporary patch to use the original code again; dyn. allocated * @patched: the func has been added to the klp_ops list * @transition: the func is currently being applied or reverted @@ -125,7 +124,6 @@ struct klp_callbacks { * @node: list node for klp_patch obj_list * @mod: kernel module associated with the patched object * (NULL for vmlinux) - * @kobj_added: @kobj has been added and needs freeing * @dynamic: temporary object for nop functions; dynamically allocated * @patched: the object's funcs have been added to the klp_ops list */ @@ -152,7 +150,6 @@ struct klp_object { * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources * @obj_list: dynamic list of the object entries - * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @forced: was involved in a forced transition * @free_work: patch cleanup from workqueue-context -- cgit v1.2.3 From 09ed79d6d75f06cc963a78f25463251b0a758dc7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:47 -0700 Subject: percpu_ref: introduce PERCPU_REF_ALLOW_REINIT flag In most cases percpu reference counters are not switched to the percpu mode after they reach the atomic mode. Some obvious exceptions are reference counters which are initialized into the atomic mode (using PERCPU_REF_INIT_ATOMIC and PERCPU_REF_INIT_DEAD flags), and there are few other exceptions. But in most cases there is no way back, and once the reference counter is switched to the atomic mode, there is no reason to wait for percpu_ref_exit() to release the percpu memory. Of course, the size of a single counter is not so big, but because it can pin the whole percpu block in memory, the memory footprint can be noticeable (e.g. on my 32 CPUs machine a percpu block is 8Mb large). To make releasing of the percpu memory as early as possible, let's introduce the PERCPU_REF_ALLOW_REINIT flag with the following semantics: it has to be set in order to switch a percpu reference counter to the percpu mode after the initialization. PERCPU_REF_INIT_ATOMIC and PERCPU_REF_INIT_DEAD flags will implicitly assume PERCPU_REF_ALLOW_REINIT. This patch doesn't introduce any functional change to avoid any regressions. It will be done later in the patchset after adjusting all call sites, which are reviving percpu counters. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b297cd1cd4f1..0f0240af8520 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -75,14 +75,21 @@ enum { * operation using percpu_ref_switch_to_percpu(). If initialized * with this flag, the ref will stay in atomic mode until * percpu_ref_switch_to_percpu() is invoked on it. + * Implies ALLOW_REINIT. */ PERCPU_REF_INIT_ATOMIC = 1 << 0, /* * Start dead w/ ref == 0 in atomic mode. Must be revived with - * percpu_ref_reinit() before used. Implies INIT_ATOMIC. + * percpu_ref_reinit() before used. Implies INIT_ATOMIC and + * ALLOW_REINIT. */ PERCPU_REF_INIT_DEAD = 1 << 1, + + /* + * Allow switching from atomic mode to percpu mode. + */ + PERCPU_REF_ALLOW_REINIT = 1 << 2, }; struct percpu_ref { -- cgit v1.2.3 From 7d9ab9b6adffd9c474c1274acb5f6208f9a09cf3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:50 -0700 Subject: percpu_ref: release percpu memory early without PERCPU_REF_ALLOW_REINIT Release percpu memory after finishing the switch to the atomic mode if only PERCPU_REF_ALLOW_REINIT isn't set. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 1 + lib/percpu-refcount.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 0f0240af8520..7aef0abc194a 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -102,6 +102,7 @@ struct percpu_ref { percpu_ref_func_t *release; percpu_ref_func_t *confirm_switch; bool force_atomic:1; + bool allow_reinit:1; struct rcu_head rcu; }; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9877682e49c7..501b517bd3db 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -69,11 +69,14 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, return -ENOMEM; ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; + ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; - if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) + if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; - else + ref->allow_reinit = true; + } else { start_count += PERCPU_COUNT_BIAS; + } if (flags & PERCPU_REF_INIT_DEAD) ref->percpu_count_ptr |= __PERCPU_REF_DEAD; @@ -119,6 +122,9 @@ static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) ref->confirm_switch = NULL; wake_up_all(&percpu_ref_switch_waitq); + if (!ref->allow_reinit) + percpu_ref_exit(ref); + /* drop ref from percpu_ref_switch_to_atomic() */ percpu_ref_put(ref); } @@ -194,6 +200,9 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) return; + if (WARN_ON_ONCE(!ref->allow_reinit)) + return; + atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); /* -- cgit v1.2.3 From 5d7e2b5ed5858fe739d4cb8ad22dcce7bd9dbe7b Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 23 Feb 2019 08:49:49 +0000 Subject: spi: core: allow reporting the effectivly used speed_hz for a transfer Provide a means for the spi bus driver to report the effectively used spi clock frequency used for each spi_transfer. Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- drivers/spi/spi.c | 1 + include/linux/spi/spi.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index cfa3c3decb8a..e9bf0c23da50 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3126,6 +3126,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) */ message->frame_length = 0; list_for_each_entry(xfer, &message->transfers, transfer_list) { + xfer->effective_speed_hz = 0; message->frame_length += xfer->len; if (!xfer->bits_per_word) xfer->bits_per_word = spi->bits_per_word; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index f55b20254612..d0c5ba746e01 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -745,6 +745,9 @@ extern void spi_res_release(struct spi_controller *ctlr, * (set by bits_per_word) transmission. * @word_delay: clock cycles to inter word delay after each word size * (set by bits_per_word) transmission. + * @effective_speed_hz: the effective SCK-speed that was used to + * transfer this transfer. Set to 0 if the spi bus driver does + * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use @@ -835,6 +838,8 @@ struct spi_transfer { u32 speed_hz; u16 word_delay; + u32 effective_speed_hz; + struct list_head transfer_list; }; -- cgit v1.2.3 From 6319aee10e530315689db7609a7d4c444124ff22 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 May 2019 15:19:13 +0530 Subject: opp: Attach genpds to devices from within OPP core The OPP core requires the virtual device pointers to set performance state on behalf of the device, for the multiple power domain case. The genpd API (dev_pm_domain_attach_by_name()) has evolved now to support even the single power domain case and that lets us add common code for handling both the cases more efficiently. The virtual device structure returned by dev_pm_domain_attach_by_name() isn't normally used by the cpufreq drivers as they don't manage power on/off of the domains and so is only useful for the OPP core. This patch moves all the complexity into the OPP core to make the end drivers simple. The earlier APIs dev_pm_opp_{set|put}_genpd_virt_dev() are reworked into dev_pm_opp_{attach|detach}_genpd(). The new helper dev_pm_opp_attach_genpd() accepts a NULL terminated array of strings which contains names of all the genpd's to attach. It then attaches all the domains and saves the pointers to the virtual devices. The other helper undo the work done by this helper. Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 128 +++++++++++++++++++++++++++++++------------------ include/linux/pm_opp.h | 8 ++-- 2 files changed, 86 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0e7703fe733f..67d6b0caeab1 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1744,91 +1744,127 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); +static void _opp_detach_genpd(struct opp_table *opp_table) +{ + int index; + + for (index = 0; index < opp_table->required_opp_count; index++) { + if (!opp_table->genpd_virt_devs[index]) + continue; + + dev_pm_domain_detach(opp_table->genpd_virt_devs[index], false); + opp_table->genpd_virt_devs[index] = NULL; + } +} + /** - * dev_pm_opp_set_genpd_virt_dev - Set virtual genpd device for an index - * @dev: Consumer device for which the genpd device is getting set. - * @virt_dev: virtual genpd device. - * @index: index. + * dev_pm_opp_attach_genpd - Attach genpd(s) for the device and save virtual device pointer + * @dev: Consumer device for which the genpd is getting attached. + * @names: Null terminated array of pointers containing names of genpd to attach. * * Multiple generic power domains for a device are supported with the help of * virtual genpd devices, which are created for each consumer device - genpd * pair. These are the device structures which are attached to the power domain * and are required by the OPP core to set the performance state of the genpd. + * The same API also works for the case where single genpd is available and so + * we don't need to support that separately. * * This helper will normally be called by the consumer driver of the device - * "dev", as only that has details of the genpd devices. + * "dev", as only that has details of the genpd names. * - * This helper needs to be called once for each of those virtual devices, but - * only if multiple domains are available for a device. Otherwise the original - * device structure will be used instead by the OPP core. + * This helper needs to be called once with a list of all genpd to attach. + * Otherwise the original device structure will be used instead by the OPP core. */ -struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, - struct device *virt_dev, - int index) +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) { struct opp_table *opp_table; + struct device *virt_dev; + int index, ret = -EINVAL; + const char **name = names; opp_table = dev_pm_opp_get_opp_table(dev); if (!opp_table) return ERR_PTR(-ENOMEM); + /* + * If the genpd's OPP table isn't already initialized, parsing of the + * required-opps fail for dev. We should retry this after genpd's OPP + * table is added. + */ + if (!opp_table->required_opp_count) { + ret = -EPROBE_DEFER; + goto put_table; + } + mutex_lock(&opp_table->genpd_virt_dev_lock); - if (unlikely(!opp_table->genpd_virt_devs || - index >= opp_table->required_opp_count || - opp_table->genpd_virt_devs[index])) { + while (*name) { + index = of_property_match_string(dev->of_node, + "power-domain-names", *name); + if (index < 0) { + dev_err(dev, "Failed to find power domain: %s (%d)\n", + *name, index); + goto err; + } - dev_err(dev, "Invalid request to set required device\n"); - dev_pm_opp_put_opp_table(opp_table); - mutex_unlock(&opp_table->genpd_virt_dev_lock); + if (index >= opp_table->required_opp_count) { + dev_err(dev, "Index can't be greater than required-opp-count - 1, %s (%d : %d)\n", + *name, opp_table->required_opp_count, index); + goto err; + } - return ERR_PTR(-EINVAL); + if (opp_table->genpd_virt_devs[index]) { + dev_err(dev, "Genpd virtual device already set %s\n", + *name); + goto err; + } + + virt_dev = dev_pm_domain_attach_by_name(dev, *name); + if (IS_ERR(virt_dev)) { + ret = PTR_ERR(virt_dev); + dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret); + goto err; + } + + opp_table->genpd_virt_devs[index] = virt_dev; + name++; } - opp_table->genpd_virt_devs[index] = virt_dev; mutex_unlock(&opp_table->genpd_virt_dev_lock); return opp_table; + +err: + _opp_detach_genpd(opp_table); + mutex_unlock(&opp_table->genpd_virt_dev_lock); + +put_table: + dev_pm_opp_put_opp_table(opp_table); + + return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(dev_pm_opp_attach_genpd); /** - * dev_pm_opp_put_genpd_virt_dev() - Releases resources blocked for genpd device. - * @opp_table: OPP table returned by dev_pm_opp_set_genpd_virt_dev(). - * @virt_dev: virtual genpd device. - * - * This releases the resource previously acquired with a call to - * dev_pm_opp_set_genpd_virt_dev(). The consumer driver shall call this helper - * if it doesn't want OPP core to update performance state of a power domain - * anymore. + * dev_pm_opp_detach_genpd() - Detach genpd(s) from the device. + * @opp_table: OPP table returned by dev_pm_opp_attach_genpd(). + * + * This detaches the genpd(s), resets the virtual device pointers, and puts the + * OPP table. */ -void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, - struct device *virt_dev) +void dev_pm_opp_detach_genpd(struct opp_table *opp_table) { - int i; - /* * Acquire genpd_virt_dev_lock to make sure virt_dev isn't getting * used in parallel. */ mutex_lock(&opp_table->genpd_virt_dev_lock); - - for (i = 0; i < opp_table->required_opp_count; i++) { - if (opp_table->genpd_virt_devs[i] != virt_dev) - continue; - - opp_table->genpd_virt_devs[i] = NULL; - dev_pm_opp_put_opp_table(opp_table); - - /* Drop the vote */ - dev_pm_genpd_set_performance_state(virt_dev, 0); - break; - } - + _opp_detach_genpd(opp_table); mutex_unlock(&opp_table->genpd_virt_dev_lock); - if (unlikely(i == opp_table->required_opp_count)) - dev_err(virt_dev, "Failed to find required device entry\n"); + dev_pm_opp_put_opp_table(opp_table); } +EXPORT_SYMBOL_GPL(dev_pm_opp_detach_genpd); /** * dev_pm_opp_xlate_performance_state() - Find required OPP's pstate for src_table. diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index b150fe97ce5a..be570761b77a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -131,8 +131,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index); -void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev); +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names); +void dev_pm_opp_detach_genpd(struct opp_table *opp_table); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); @@ -295,12 +295,12 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index) +static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) { return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev) {} +static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} static inline int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate) { -- cgit v1.2.3 From 4c1ca625c622b7a9f04c2949fd1ffdc6effa86de Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Tue, 16 Apr 2019 19:20:47 -0600 Subject: platform/chrome: wilco_ec: Add Boot on AC support Boot on AC is a policy which makes the device boot from S5 when AC power is connected. This is useful for users who want to run their device headless or with a dock. Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra --- Documentation/ABI/testing/sysfs-platform-wilco-ec | 9 +++ drivers/platform/chrome/wilco_ec/Makefile | 2 +- drivers/platform/chrome/wilco_ec/core.c | 9 +++ drivers/platform/chrome/wilco_ec/sysfs.c | 77 +++++++++++++++++++++++ include/linux/platform_data/wilco-ec.h | 12 ++++ 5 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-platform-wilco-ec create mode 100644 drivers/platform/chrome/wilco_ec/sysfs.c (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-platform-wilco-ec b/Documentation/ABI/testing/sysfs-platform-wilco-ec new file mode 100644 index 000000000000..8e5d6eee44db --- /dev/null +++ b/Documentation/ABI/testing/sysfs-platform-wilco-ec @@ -0,0 +1,9 @@ +What: /sys/bus/platform/devices/GOOG000C\:00/boot_on_ac +Date: April 2019 +KernelVersion: 5.3 +Description: + Boot on AC is a policy which makes the device boot from S5 + when AC power is connected. This is useful for users who + want to run their device headless or with a dock. + + Input should be parseable by kstrtou8() to 0 or 1. diff --git a/drivers/platform/chrome/wilco_ec/Makefile b/drivers/platform/chrome/wilco_ec/Makefile index 29b734137786..72df9b5e1983 100644 --- a/drivers/platform/chrome/wilco_ec/Makefile +++ b/drivers/platform/chrome/wilco_ec/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -wilco_ec-objs := core.o mailbox.o properties.o +wilco_ec-objs := core.o mailbox.o properties.o sysfs.o obj-$(CONFIG_WILCO_EC) += wilco_ec.o wilco_ec_debugfs-objs := debugfs.o obj-$(CONFIG_WILCO_EC_DEBUGFS) += wilco_ec_debugfs.o diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c index 05e1e2be1c91..abd15d04e57b 100644 --- a/drivers/platform/chrome/wilco_ec/core.c +++ b/drivers/platform/chrome/wilco_ec/core.c @@ -89,8 +89,16 @@ static int wilco_ec_probe(struct platform_device *pdev) goto unregister_debugfs; } + ret = wilco_ec_add_sysfs(ec); + if (ret < 0) { + dev_err(dev, "Failed to create sysfs entries: %d", ret); + goto unregister_rtc; + } + return 0; +unregister_rtc: + platform_device_unregister(ec->rtc_pdev); unregister_debugfs: if (ec->debugfs_pdev) platform_device_unregister(ec->debugfs_pdev); @@ -102,6 +110,7 @@ static int wilco_ec_remove(struct platform_device *pdev) { struct wilco_ec_device *ec = platform_get_drvdata(pdev); + wilco_ec_remove_sysfs(ec); platform_device_unregister(ec->rtc_pdev); if (ec->debugfs_pdev) platform_device_unregister(ec->debugfs_pdev); diff --git a/drivers/platform/chrome/wilco_ec/sysfs.c b/drivers/platform/chrome/wilco_ec/sysfs.c new file mode 100644 index 000000000000..f84f0480460a --- /dev/null +++ b/drivers/platform/chrome/wilco_ec/sysfs.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + * + * Sysfs properties to view and modify EC-controlled features on Wilco devices. + * The entries will appear under /sys/bus/platform/devices/GOOG000C:00/ + * + * See Documentation/ABI/testing/sysfs-platform-wilco-ec for more information. + */ + +#include +#include + +#define CMD_KB_CMOS 0x7C +#define SUB_CMD_KB_CMOS_AUTO_ON 0x03 + +struct boot_on_ac_request { + u8 cmd; /* Always CMD_KB_CMOS */ + u8 reserved1; + u8 sub_cmd; /* Always SUB_CMD_KB_CMOS_AUTO_ON */ + u8 reserved3to5[3]; + u8 val; /* Either 0 or 1 */ + u8 reserved7; +} __packed; + +static ssize_t boot_on_ac_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct wilco_ec_device *ec = dev_get_drvdata(dev); + struct boot_on_ac_request rq; + struct wilco_ec_message msg; + int ret; + u8 val; + + ret = kstrtou8(buf, 10, &val); + if (ret < 0) + return ret; + if (val > 1) + return -EINVAL; + + memset(&rq, 0, sizeof(rq)); + rq.cmd = CMD_KB_CMOS; + rq.sub_cmd = SUB_CMD_KB_CMOS_AUTO_ON; + rq.val = val; + + memset(&msg, 0, sizeof(msg)); + msg.type = WILCO_EC_MSG_LEGACY; + msg.request_data = &rq; + msg.request_size = sizeof(rq); + ret = wilco_ec_mailbox(ec, &msg); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR_WO(boot_on_ac); + +static struct attribute *wilco_dev_attrs[] = { + &dev_attr_boot_on_ac.attr, + NULL, +}; + +static struct attribute_group wilco_dev_attr_group = { + .attrs = wilco_dev_attrs, +}; + +int wilco_ec_add_sysfs(struct wilco_ec_device *ec) +{ + return sysfs_create_group(&ec->dev->kobj, &wilco_dev_attr_group); +} + +void wilco_ec_remove_sysfs(struct wilco_ec_device *ec) +{ + sysfs_remove_group(&ec->dev->kobj, &wilco_dev_attr_group); +} diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index 50a21bd5fd44..af68fc0563cc 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -194,4 +194,16 @@ int wilco_ec_get_byte_property(struct wilco_ec_device *ec, u32 property_id, int wilco_ec_set_byte_property(struct wilco_ec_device *ec, u32 property_id, u8 val); +/** + * wilco_ec_add_sysfs() - Create sysfs entries + * @ec: Wilco EC device + * + * wilco_ec_remove_sysfs() needs to be called afterwards + * to perform the necessary cleanup. + * + * Return: 0 on success or negative error code on failure. + */ +int wilco_ec_add_sysfs(struct wilco_ec_device *ec); +void wilco_ec_remove_sysfs(struct wilco_ec_device *ec); + #endif /* WILCO_EC_H */ -- cgit v1.2.3 From 0c0b7ea23aed0b55ef2f9803f13ddaae1943713d Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Wed, 24 Apr 2019 10:56:50 -0600 Subject: platform/chrome: wilco_ec: Add property helper library A Property is typically a data item that is stored to NVRAM by the EC. Each of these data items has an index associated with it, known as the Property ID (PID). Properties may have variable lengths, up to a max of WILCO_EC_PROPERTY_MAX_SIZE bytes. Properties can be simple integers, or they may be more complex binary data. This patch adds support for getting and setting properties. This will be useful for setting the charge algorithm and charge schedules, which all use properties. Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra --- drivers/platform/chrome/wilco_ec/Makefile | 2 +- drivers/platform/chrome/wilco_ec/properties.c | 132 ++++++++++++++++++++++++++ include/linux/platform_data/wilco-ec.h | 71 ++++++++++++++ 3 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 drivers/platform/chrome/wilco_ec/properties.c (limited to 'include/linux') diff --git a/drivers/platform/chrome/wilco_ec/Makefile b/drivers/platform/chrome/wilco_ec/Makefile index 063e7fb4ea17..29b734137786 100644 --- a/drivers/platform/chrome/wilco_ec/Makefile +++ b/drivers/platform/chrome/wilco_ec/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -wilco_ec-objs := core.o mailbox.o +wilco_ec-objs := core.o mailbox.o properties.o obj-$(CONFIG_WILCO_EC) += wilco_ec.o wilco_ec_debugfs-objs := debugfs.o obj-$(CONFIG_WILCO_EC_DEBUGFS) += wilco_ec_debugfs.o diff --git a/drivers/platform/chrome/wilco_ec/properties.c b/drivers/platform/chrome/wilco_ec/properties.c new file mode 100644 index 000000000000..e69682c95ea2 --- /dev/null +++ b/drivers/platform/chrome/wilco_ec/properties.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include +#include +#include + +/* Operation code; what the EC should do with the property */ +enum ec_property_op { + EC_OP_GET = 0, + EC_OP_SET = 1, +}; + +struct ec_property_request { + u8 op; /* One of enum ec_property_op */ + u8 property_id[4]; /* The 32 bit PID is stored Little Endian */ + u8 length; + u8 data[WILCO_EC_PROPERTY_MAX_SIZE]; +} __packed; + +struct ec_property_response { + u8 reserved[2]; + u8 op; /* One of enum ec_property_op */ + u8 property_id[4]; /* The 32 bit PID is stored Little Endian */ + u8 length; + u8 data[WILCO_EC_PROPERTY_MAX_SIZE]; +} __packed; + +static int send_property_msg(struct wilco_ec_device *ec, + struct ec_property_request *rq, + struct ec_property_response *rs) +{ + struct wilco_ec_message ec_msg; + int ret; + + memset(&ec_msg, 0, sizeof(ec_msg)); + ec_msg.type = WILCO_EC_MSG_PROPERTY; + ec_msg.request_data = rq; + ec_msg.request_size = sizeof(*rq); + ec_msg.response_data = rs; + ec_msg.response_size = sizeof(*rs); + + ret = wilco_ec_mailbox(ec, &ec_msg); + if (ret < 0) + return ret; + if (rs->op != rq->op) + return -EBADMSG; + if (memcmp(rq->property_id, rs->property_id, sizeof(rs->property_id))) + return -EBADMSG; + + return 0; +} + +int wilco_ec_get_property(struct wilco_ec_device *ec, + struct wilco_ec_property_msg *prop_msg) +{ + struct ec_property_request rq; + struct ec_property_response rs; + int ret; + + memset(&rq, 0, sizeof(rq)); + rq.op = EC_OP_GET; + put_unaligned_le32(prop_msg->property_id, rq.property_id); + + ret = send_property_msg(ec, &rq, &rs); + if (ret < 0) + return ret; + + prop_msg->length = rs.length; + memcpy(prop_msg->data, rs.data, rs.length); + + return 0; +} +EXPORT_SYMBOL_GPL(wilco_ec_get_property); + +int wilco_ec_set_property(struct wilco_ec_device *ec, + struct wilco_ec_property_msg *prop_msg) +{ + struct ec_property_request rq; + struct ec_property_response rs; + int ret; + + memset(&rq, 0, sizeof(rq)); + rq.op = EC_OP_SET; + put_unaligned_le32(prop_msg->property_id, rq.property_id); + rq.length = prop_msg->length; + memcpy(rq.data, prop_msg->data, prop_msg->length); + + ret = send_property_msg(ec, &rq, &rs); + if (ret < 0) + return ret; + if (rs.length != prop_msg->length) + return -EBADMSG; + + return 0; +} +EXPORT_SYMBOL_GPL(wilco_ec_set_property); + +int wilco_ec_get_byte_property(struct wilco_ec_device *ec, u32 property_id, + u8 *val) +{ + struct wilco_ec_property_msg msg; + int ret; + + msg.property_id = property_id; + + ret = wilco_ec_get_property(ec, &msg); + if (ret < 0) + return ret; + if (msg.length != 1) + return -EBADMSG; + + *val = msg.data[0]; + + return 0; +} +EXPORT_SYMBOL_GPL(wilco_ec_get_byte_property); + +int wilco_ec_set_byte_property(struct wilco_ec_device *ec, u32 property_id, + u8 val) +{ + struct wilco_ec_property_msg msg; + + msg.property_id = property_id; + msg.data[0] = val; + msg.length = 1; + + return wilco_ec_set_property(ec, &msg); +} +EXPORT_SYMBOL_GPL(wilco_ec_set_byte_property); diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index 1ff224793c99..50a21bd5fd44 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -123,4 +123,75 @@ struct wilco_ec_message { */ int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg); +/* + * A Property is typically a data item that is stored to NVRAM + * by the EC. Each of these data items has an index associated + * with it, known as the Property ID (PID). Properties may have + * variable lengths, up to a max of WILCO_EC_PROPERTY_MAX_SIZE + * bytes. Properties can be simple integers, or they may be more + * complex binary data. + */ + +#define WILCO_EC_PROPERTY_MAX_SIZE 4 + +/** + * struct ec_property_set_msg - Message to get or set a property. + * @property_id: Which property to get or set. + * @length: Number of bytes of |data| that are used. + * @data: Actual property data. + */ +struct wilco_ec_property_msg { + u32 property_id; + int length; + u8 data[WILCO_EC_PROPERTY_MAX_SIZE]; +}; + +/** + * wilco_ec_get_property() - Retrieve a property from the EC. + * @ec: Embedded Controller device. + * @prop_msg: Message for request and response. + * + * The property_id field of |prop_msg| should be filled before calling this + * function. The result will be stored in the data and length fields. + * + * Return: 0 on success, negative error code on failure. + */ +int wilco_ec_get_property(struct wilco_ec_device *ec, + struct wilco_ec_property_msg *prop_msg); + +/** + * wilco_ec_set_property() - Store a property on the EC. + * @ec: Embedded Controller device. + * @prop_msg: Message for request and response. + * + * The property_id, length, and data fields of |prop_msg| should be + * filled before calling this function. + * + * Return: 0 on success, negative error code on failure. + */ +int wilco_ec_set_property(struct wilco_ec_device *ec, + struct wilco_ec_property_msg *prop_msg); + +/** + * wilco_ec_get_byte_property() - Retrieve a byte-size property from the EC. + * @ec: Embedded Controller device. + * @property_id: Which property to retrieve. + * @val: The result value, will be filled by this function. + * + * Return: 0 on success, negative error code on failure. + */ +int wilco_ec_get_byte_property(struct wilco_ec_device *ec, u32 property_id, + u8 *val); + +/** + * wilco_ec_get_byte_property() - Store a byte-size property on the EC. + * @ec: Embedded Controller device. + * @property_id: Which property to store. + * @val: Value to store. + * + * Return: 0 on success, negative error code on failure. + */ +int wilco_ec_set_byte_property(struct wilco_ec_device *ec, u32 property_id, + u8 val); + #endif /* WILCO_EC_H */ -- cgit v1.2.3 From 2ad1f7a91449de48d4bd5d1ec361ba7bb9026505 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Wed, 8 May 2019 15:38:09 -0600 Subject: platform/chrome: wilco_ec: Remove 256 byte transfers The 0xF6 command, intended to send and receive 256 byte payloads to and from the EC, is not needed. The 0xF5 command for 32 byte payloads is sufficient. This patch removes support for the 0xF6 command and 256 byte payloads. Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra --- Documentation/ABI/testing/debugfs-wilco-ec | 16 +++++++--------- drivers/platform/chrome/wilco_ec/core.c | 4 +--- drivers/platform/chrome/wilco_ec/debugfs.c | 10 ++-------- drivers/platform/chrome/wilco_ec/mailbox.c | 21 +++++---------------- include/linux/platform_data/wilco-ec.h | 9 ++------- 5 files changed, 17 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/debugfs-wilco-ec b/Documentation/ABI/testing/debugfs-wilco-ec index 73a5a66ddca6..9d8d9d2def5b 100644 --- a/Documentation/ABI/testing/debugfs-wilco-ec +++ b/Documentation/ABI/testing/debugfs-wilco-ec @@ -23,11 +23,9 @@ Description: For writing, bytes 0-1 indicate the message type, one of enum wilco_ec_msg_type. Byte 2+ consist of the data passed in the - request, starting at MBOX[0] - - At least three bytes are required for writing, two for the type - and at least a single byte of data. Only the first - EC_MAILBOX_DATA_SIZE bytes of MBOX will be used. + request, starting at MBOX[0]. At least three bytes are required + for writing, two for the type and at least a single byte of + data. Example: // Request EC info type 3 (EC firmware build date) @@ -40,7 +38,7 @@ Description: $ cat /sys/kernel/debug/wilco_ec/raw 00 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00 ..12/21/18.8... - Note that the first 32 bytes of the received MBOX[] will be - printed, even if some of the data is junk. It is up to you to - know how many of the first bytes of data are the actual - response. + Note that the first 16 bytes of the received MBOX[] will be + printed, even if some of the data is junk, and skipping bytes + 17 to 32. It is up to you to know how many of the first bytes of + data are the actual response. diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c index abd15d04e57b..45cf3a5ed062 100644 --- a/drivers/platform/chrome/wilco_ec/core.c +++ b/drivers/platform/chrome/wilco_ec/core.c @@ -52,9 +52,7 @@ static int wilco_ec_probe(struct platform_device *pdev) ec->dev = dev; mutex_init(&ec->mailbox_lock); - /* Largest data buffer size requirement is extended data response */ - ec->data_size = sizeof(struct wilco_ec_response) + - EC_MAILBOX_DATA_SIZE_EXTENDED; + ec->data_size = sizeof(struct wilco_ec_response) + EC_MAILBOX_DATA_SIZE; ec->data_buffer = devm_kzalloc(dev, ec->data_size, GFP_KERNEL); if (!ec->data_buffer) return -ENOMEM; diff --git a/drivers/platform/chrome/wilco_ec/debugfs.c b/drivers/platform/chrome/wilco_ec/debugfs.c index f163476d080d..281ec595e8e0 100644 --- a/drivers/platform/chrome/wilco_ec/debugfs.c +++ b/drivers/platform/chrome/wilco_ec/debugfs.c @@ -17,13 +17,13 @@ #define DRV_NAME "wilco-ec-debugfs" /* The 256 raw bytes will take up more space when represented as a hex string */ -#define FORMATTED_BUFFER_SIZE (EC_MAILBOX_DATA_SIZE_EXTENDED * 4) +#define FORMATTED_BUFFER_SIZE (EC_MAILBOX_DATA_SIZE * 4) struct wilco_ec_debugfs { struct wilco_ec_device *ec; struct dentry *dir; size_t response_size; - u8 raw_data[EC_MAILBOX_DATA_SIZE_EXTENDED]; + u8 raw_data[EC_MAILBOX_DATA_SIZE]; u8 formatted_data[FORMATTED_BUFFER_SIZE]; }; static struct wilco_ec_debugfs *debug_info; @@ -124,12 +124,6 @@ static ssize_t raw_write(struct file *file, const char __user *user_buf, msg.response_data = debug_info->raw_data; msg.response_size = EC_MAILBOX_DATA_SIZE; - /* Telemetry commands use extended response data */ - if (msg.type == WILCO_EC_MSG_TELEMETRY_LONG) { - msg.flags |= WILCO_EC_FLAG_EXTENDED_DATA; - msg.response_size = EC_MAILBOX_DATA_SIZE_EXTENDED; - } - ret = wilco_ec_mailbox(debug_info->ec, &msg); if (ret < 0) return ret; diff --git a/drivers/platform/chrome/wilco_ec/mailbox.c b/drivers/platform/chrome/wilco_ec/mailbox.c index 7fb58b487963..ced1f9f3dcee 100644 --- a/drivers/platform/chrome/wilco_ec/mailbox.c +++ b/drivers/platform/chrome/wilco_ec/mailbox.c @@ -119,7 +119,6 @@ static int wilco_ec_transfer(struct wilco_ec_device *ec, struct wilco_ec_response *rs; u8 checksum; u8 flag; - size_t size; /* Write request header, then data */ cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, 0, sizeof(*rq), (u8 *)rq); @@ -148,21 +147,11 @@ static int wilco_ec_transfer(struct wilco_ec_device *ec, return -EIO; } - /* - * The EC always returns either EC_MAILBOX_DATA_SIZE or - * EC_MAILBOX_DATA_SIZE_EXTENDED bytes of data, so we need to - * calculate the checksum on **all** of this data, even if we - * won't use all of it. - */ - if (msg->flags & WILCO_EC_FLAG_EXTENDED_DATA) - size = EC_MAILBOX_DATA_SIZE_EXTENDED; - else - size = EC_MAILBOX_DATA_SIZE; - /* Read back response */ rs = ec->data_buffer; checksum = cros_ec_lpc_io_bytes_mec(MEC_IO_READ, 0, - sizeof(*rs) + size, (u8 *)rs); + sizeof(*rs) + EC_MAILBOX_DATA_SIZE, + (u8 *)rs); if (checksum) { dev_dbg(ec->dev, "bad packet checksum 0x%02x\n", rs->checksum); return -EBADMSG; @@ -173,9 +162,9 @@ static int wilco_ec_transfer(struct wilco_ec_device *ec, return -EBADMSG; } - if (rs->data_size != size) { - dev_dbg(ec->dev, "unexpected packet size (%u != %zu)", - rs->data_size, size); + if (rs->data_size != EC_MAILBOX_DATA_SIZE) { + dev_dbg(ec->dev, "unexpected packet size (%u != %u)", + rs->data_size, EC_MAILBOX_DATA_SIZE); return -EMSGSIZE; } diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index af68fc0563cc..e3ce9ce49b11 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -13,12 +13,9 @@ /* Message flags for using the mailbox() interface */ #define WILCO_EC_FLAG_NO_RESPONSE BIT(0) /* EC does not respond */ -#define WILCO_EC_FLAG_EXTENDED_DATA BIT(1) /* EC returns 256 data bytes */ /* Normal commands have a maximum 32 bytes of data */ #define EC_MAILBOX_DATA_SIZE 32 -/* Extended commands have 256 bytes of response data */ -#define EC_MAILBOX_DATA_SIZE_EXTENDED 256 /** * struct wilco_ec_device - Wilco Embedded Controller handle. @@ -85,14 +82,12 @@ struct wilco_ec_response { * enum wilco_ec_msg_type - Message type to select a set of command codes. * @WILCO_EC_MSG_LEGACY: Legacy EC messages for standard EC behavior. * @WILCO_EC_MSG_PROPERTY: Get/Set/Sync EC controlled NVRAM property. - * @WILCO_EC_MSG_TELEMETRY_SHORT: 32 bytes of telemetry data provided by the EC. - * @WILCO_EC_MSG_TELEMETRY_LONG: 256 bytes of telemetry data provided by the EC. + * @WILCO_EC_MSG_TELEMETRY: Request telemetry data from the EC. */ enum wilco_ec_msg_type { WILCO_EC_MSG_LEGACY = 0x00f0, WILCO_EC_MSG_PROPERTY = 0x00f2, - WILCO_EC_MSG_TELEMETRY_SHORT = 0x00f5, - WILCO_EC_MSG_TELEMETRY_LONG = 0x00f6, + WILCO_EC_MSG_TELEMETRY = 0x00f5, }; /** -- cgit v1.2.3 From 824bd1be3ed01d67197098650d0c62b176087b11 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 20 May 2019 11:34:45 +0200 Subject: regulator: da9063: move definitions out of a header into the driver Those definitions are only used within the driver meanwhile, so put them there. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Acked-by: Steve Twiss Tested-by: Steve Twiss Reviewed-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/regulator/da9063-regulator.c | 44 +++++++++++++++++++++++++++++++- include/linux/mfd/da9063/pdata.h | 49 ------------------------------------ 2 files changed, 43 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c index 43aa0df30346..da95197fdb4f 100644 --- a/drivers/regulator/da9063-regulator.c +++ b/drivers/regulator/da9063-regulator.c @@ -19,7 +19,6 @@ #include #include #include -#include #include @@ -28,6 +27,49 @@ REG_FIELD(_reg, __builtin_ffs((int)_mask) - 1, \ sizeof(unsigned int) * 8 - __builtin_clz((_mask)) - 1) +/* DA9063 and DA9063L regulator IDs */ +enum { + /* BUCKs */ + DA9063_ID_BCORE1, + DA9063_ID_BCORE2, + DA9063_ID_BPRO, + DA9063_ID_BMEM, + DA9063_ID_BIO, + DA9063_ID_BPERI, + + /* BCORE1 and BCORE2 in merged mode */ + DA9063_ID_BCORES_MERGED, + /* BMEM and BIO in merged mode */ + DA9063_ID_BMEM_BIO_MERGED, + /* When two BUCKs are merged, they cannot be reused separately */ + + /* LDOs on both DA9063 and DA9063L */ + DA9063_ID_LDO3, + DA9063_ID_LDO7, + DA9063_ID_LDO8, + DA9063_ID_LDO9, + DA9063_ID_LDO11, + + /* DA9063-only LDOs */ + DA9063_ID_LDO1, + DA9063_ID_LDO2, + DA9063_ID_LDO4, + DA9063_ID_LDO5, + DA9063_ID_LDO6, + DA9063_ID_LDO10, +}; + +/* Old regulator platform data */ +struct da9063_regulator_data { + int id; + struct regulator_init_data *initdata; +}; + +struct da9063_regulators_pdata { + unsigned n_regulators; + struct da9063_regulator_data *regulator_data; +}; + /* Regulator capabilities and registers description */ struct da9063_regulator_info { struct regulator_desc desc; diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h index 50bed4f89c1a..21a2d107f0cf 100644 --- a/include/linux/mfd/da9063/pdata.h +++ b/include/linux/mfd/da9063/pdata.h @@ -16,55 +16,6 @@ #ifndef __MFD_DA9063_PDATA_H__ #define __MFD_DA9063_PDATA_H__ -#include - -/* - * Regulator configuration - */ -/* DA9063 and DA9063L regulator IDs */ -enum { - /* BUCKs */ - DA9063_ID_BCORE1, - DA9063_ID_BCORE2, - DA9063_ID_BPRO, - DA9063_ID_BMEM, - DA9063_ID_BIO, - DA9063_ID_BPERI, - - /* BCORE1 and BCORE2 in merged mode */ - DA9063_ID_BCORES_MERGED, - /* BMEM and BIO in merged mode */ - DA9063_ID_BMEM_BIO_MERGED, - /* When two BUCKs are merged, they cannot be reused separately */ - - /* LDOs on both DA9063 and DA9063L */ - DA9063_ID_LDO3, - DA9063_ID_LDO7, - DA9063_ID_LDO8, - DA9063_ID_LDO9, - DA9063_ID_LDO11, - - /* DA9063-only LDOs */ - DA9063_ID_LDO1, - DA9063_ID_LDO2, - DA9063_ID_LDO4, - DA9063_ID_LDO5, - DA9063_ID_LDO6, - DA9063_ID_LDO10, -}; - -/* Regulators platform data */ -struct da9063_regulator_data { - int id; - struct regulator_init_data *initdata; -}; - -struct da9063_regulators_pdata { - unsigned n_regulators; - struct da9063_regulator_data *regulator_data; -}; - - /* * RGB LED configuration */ -- cgit v1.2.3 From ec9964b4803300fb86f8e8fd9b421e59f7a71dc5 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 13 May 2019 09:56:34 +0200 Subject: Platform: OLPC: Move EC-specific functionality out from x86 Move the olpc-ec driver away from the X86 OLPC platform so that it could be used by the ARM based laptops too. Notably, the driver for the OLPC battery, which is also used on the ARM models, builds on this driver's interface. It is actually plaform independent: the OLPC EC commands with their argument and responses are mostly the same despite the delivery mechanism is different. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Andy Shevchenko --- arch/x86/include/asm/olpc.h | 31 ---------- arch/x86/platform/olpc/olpc.c | 119 ++++++------------------------------ drivers/platform/olpc/olpc-ec.c | 99 +++++++++++++++++++++++++++++- drivers/power/supply/olpc_battery.c | 1 - include/linux/olpc-ec.h | 32 +++++++++- 5 files changed, 145 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index c2bf1de5d901..6fe76282aceb 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -9,12 +9,10 @@ struct olpc_platform_t { int flags; uint32_t boardrev; - int ecver; }; #define OLPC_F_PRESENT 0x01 #define OLPC_F_DCON 0x02 -#define OLPC_F_EC_WIDE_SCI 0x04 #ifdef CONFIG_OLPC @@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev) return olpc_platform_info.boardrev >= rev; } -extern void olpc_ec_wakeup_set(u16 value); -extern void olpc_ec_wakeup_clear(u16 value); -extern bool olpc_ec_wakeup_available(void); - -extern int olpc_ec_mask_write(u16 bits); -extern int olpc_ec_sci_query(u16 *sci_value); - #else static inline int machine_is_olpc(void) @@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void) return 0; } -static inline void olpc_ec_wakeup_set(u16 value) { } -static inline void olpc_ec_wakeup_clear(u16 value) { } - -static inline bool olpc_ec_wakeup_available(void) -{ - return false; -} - #endif #ifdef CONFIG_OLPC_XO1_PM @@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value); extern int pci_olpc_init(void); -/* SCI source values */ - -#define EC_SCI_SRC_EMPTY 0x00 -#define EC_SCI_SRC_GAME 0x01 -#define EC_SCI_SRC_BATTERY 0x02 -#define EC_SCI_SRC_BATSOC 0x04 -#define EC_SCI_SRC_BATERR 0x08 -#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */ -#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */ -#define EC_SCI_SRC_ACPWR 0x40 -#define EC_SCI_SRC_BATCRIT 0x80 -#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */ -#define EC_SCI_SRC_ALL 0x1FF - /* GPIO assignments */ #define OLPC_GPIO_MIC_AC 1 diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index f0e920fb98ad..c6c62b4f251f 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -30,9 +30,6 @@ struct olpc_platform_t olpc_platform_info; EXPORT_SYMBOL_GPL(olpc_platform_info); -/* EC event mask to be applied during suspend (defining wakeup sources). */ -static u16 ec_wakeup_mask; - /* what the timeout *should* be (in ms) */ #define EC_BASE_TIMEOUT 20 @@ -186,83 +183,6 @@ err: return ret; } -void olpc_ec_wakeup_set(u16 value) -{ - ec_wakeup_mask |= value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set); - -void olpc_ec_wakeup_clear(u16 value) -{ - ec_wakeup_mask &= ~value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear); - -/* - * Returns true if the compile and runtime configurations allow for EC events - * to wake the system. - */ -bool olpc_ec_wakeup_available(void) -{ - if (!machine_is_olpc()) - return false; - - /* - * XO-1 EC wakeups are available when olpc-xo1-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO1_SCI - if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */ - return true; -#endif - - /* - * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO15_SCI - if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */ - return true; -#endif - - return false; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available); - -int olpc_ec_mask_write(u16 bits) -{ - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word = cpu_to_be16(bits); - return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2, - NULL, 0); - } else { - unsigned char ec_byte = bits & 0xff; - return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0); - } -} -EXPORT_SYMBOL_GPL(olpc_ec_mask_write); - -int olpc_ec_sci_query(u16 *sci_value) -{ - int ret; - - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word; - ret = olpc_ec_cmd(EC_EXT_SCI_QUERY, - NULL, 0, (void *) &ec_word, 2); - if (ret == 0) - *sci_value = be16_to_cpu(ec_word); - } else { - unsigned char ec_byte; - ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1); - if (ret == 0) - *sci_value = ec_byte; - } - - return ret; -} -EXPORT_SYMBOL_GPL(olpc_ec_sci_query); - static bool __init check_ofw_architecture(struct device_node *root) { const char *olpc_arch; @@ -296,6 +216,10 @@ static bool __init platform_detect(void) if (success) { olpc_platform_info.boardrev = get_board_revision(root); olpc_platform_info.flags |= OLPC_F_PRESENT; + + pr_info("OLPC board revision %s%X\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4); } of_node_put(root); @@ -315,27 +239,8 @@ static int __init add_xo1_platform_devices(void) return PTR_ERR_OR_ZERO(pdev); } -static int olpc_xo1_ec_probe(struct platform_device *pdev) -{ - /* get the EC revision */ - olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, - (unsigned char *) &olpc_platform_info.ecver, 1); - - /* EC version 0x5f adds support for wide SCI mask */ - if (olpc_platform_info.ecver >= 0x5f) - olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI; - - pr_info("OLPC board revision %s%X (EC=%x)\n", - ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", - olpc_platform_info.boardrev >> 4, - olpc_platform_info.ecver); - - return 0; -} static int olpc_xo1_ec_suspend(struct platform_device *pdev) { - olpc_ec_mask_write(ec_wakeup_mask); - /* * Squelch SCIs while suspended. This is a fix for * . @@ -359,15 +264,27 @@ static int olpc_xo1_ec_resume(struct platform_device *pdev) } static struct olpc_ec_driver ec_xo1_driver = { - .probe = olpc_xo1_ec_probe, .suspend = olpc_xo1_ec_suspend, .resume = olpc_xo1_ec_resume, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO1_SCI + /* + * XO-1 EC wakeups are available when olpc-xo1-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static struct olpc_ec_driver ec_xo1_5_driver = { - .probe = olpc_xo1_ec_probe, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO1_5_SCI + /* + * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static int __init olpc_init(void) diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index 981955dce926..2a647455a368 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -32,6 +32,7 @@ struct ec_cmd_desc { struct olpc_ec_priv { struct olpc_ec_driver *drv; + u8 version; struct work_struct worker; struct mutex cmd_lock; @@ -41,6 +42,12 @@ struct olpc_ec_priv { struct dentry *dbgfs_dir; + /* + * EC event mask to be applied during suspend (defining wakeup + * sources). + */ + u16 ec_wakeup_mask; + /* * Running an EC command while suspending means we don't always finish * the command before the machine suspends. This means that the EC @@ -149,6 +156,88 @@ int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen) } EXPORT_SYMBOL_GPL(olpc_ec_cmd); +void olpc_ec_wakeup_set(u16 value) +{ + struct olpc_ec_priv *ec = ec_priv; + + if (WARN_ON(!ec)) + return; + + ec->ec_wakeup_mask |= value; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set); + +void olpc_ec_wakeup_clear(u16 value) +{ + struct olpc_ec_priv *ec = ec_priv; + + if (WARN_ON(!ec)) + return; + + ec->ec_wakeup_mask &= ~value; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear); + +int olpc_ec_mask_write(u16 bits) +{ + struct olpc_ec_priv *ec = ec_priv; + + if (WARN_ON(!ec)) + return -ENODEV; + + /* EC version 0x5f adds support for wide SCI mask */ + if (ec->version >= 0x5f) { + __be16 ec_word = cpu_to_be16(bits); + + return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *)&ec_word, 2, NULL, 0); + } else { + u8 ec_byte = bits & 0xff; + + return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0); + } +} +EXPORT_SYMBOL_GPL(olpc_ec_mask_write); + +/* + * Returns true if the compile and runtime configurations allow for EC events + * to wake the system. + */ +bool olpc_ec_wakeup_available(void) +{ + if (WARN_ON(!ec_driver)) + return false; + + return ec_driver->wakeup_available; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available); + +int olpc_ec_sci_query(u16 *sci_value) +{ + struct olpc_ec_priv *ec = ec_priv; + int ret; + + if (WARN_ON(!ec)) + return -ENODEV; + + /* EC version 0x5f adds support for wide SCI mask */ + if (ec->version >= 0x5f) { + __be16 ec_word; + + ret = olpc_ec_cmd(EC_EXT_SCI_QUERY, NULL, 0, (void *)&ec_word, 2); + if (ret == 0) + *sci_value = be16_to_cpu(ec_word); + } else { + u8 ec_byte; + + ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1); + if (ret == 0) + *sci_value = ec_byte; + } + + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_sci_query); + #ifdef CONFIG_DEBUG_FS /* @@ -276,14 +365,16 @@ static int olpc_ec_probe(struct platform_device *pdev) ec_priv = ec; platform_set_drvdata(pdev, ec); - err = ec_driver->probe ? ec_driver->probe(pdev) : 0; + /* get the EC revision */ + err = olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, &ec->version, 1); if (err) { ec_priv = NULL; kfree(ec); - } else { - ec->dbgfs_dir = olpc_ec_setup_debugfs(); + return err; } + ec->dbgfs_dir = olpc_ec_setup_debugfs(); + return err; } @@ -293,6 +384,8 @@ static int olpc_ec_suspend(struct device *dev) struct olpc_ec_priv *ec = platform_get_drvdata(pdev); int err = 0; + olpc_ec_mask_write(ec->ec_wakeup_mask); + if (ec_driver->suspend) err = ec_driver->suspend(pdev); if (!err) diff --git a/drivers/power/supply/olpc_battery.c b/drivers/power/supply/olpc_battery.c index 7720e4c2ac0b..066ec9a11153 100644 --- a/drivers/power/supply/olpc_battery.c +++ b/drivers/power/supply/olpc_battery.c @@ -20,7 +20,6 @@ #include #include #include -#include #define EC_BAT_VOLTAGE 0x10 /* uint16_t, *9.76/32, mV */ diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h index 79bdc6328c52..7fa3d27f7fee 100644 --- a/include/linux/olpc-ec.h +++ b/include/linux/olpc-ec.h @@ -16,14 +16,28 @@ #define EC_SCI_QUERY 0x84 #define EC_EXT_SCI_QUERY 0x85 +/* SCI source values */ +#define EC_SCI_SRC_EMPTY 0x00 +#define EC_SCI_SRC_GAME 0x01 +#define EC_SCI_SRC_BATTERY 0x02 +#define EC_SCI_SRC_BATSOC 0x04 +#define EC_SCI_SRC_BATERR 0x08 +#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */ +#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */ +#define EC_SCI_SRC_ACPWR 0x40 +#define EC_SCI_SRC_BATCRIT 0x80 +#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */ +#define EC_SCI_SRC_ALL 0x1FF + struct platform_device; struct olpc_ec_driver { - int (*probe)(struct platform_device *); int (*suspend)(struct platform_device *); int (*resume)(struct platform_device *); int (*ec_cmd)(u8, u8 *, size_t, u8 *, size_t, void *); + + bool wakeup_available; }; #ifdef CONFIG_OLPC @@ -33,11 +47,27 @@ extern void olpc_ec_driver_register(struct olpc_ec_driver *drv, void *arg); extern int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen); +extern void olpc_ec_wakeup_set(u16 value); +extern void olpc_ec_wakeup_clear(u16 value); + +extern int olpc_ec_mask_write(u16 bits); +extern int olpc_ec_sci_query(u16 *sci_value); + +extern bool olpc_ec_wakeup_available(void); + #else static inline int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen) { return -ENODEV; } +static inline void olpc_ec_wakeup_set(u16 value) { } +static inline void olpc_ec_wakeup_clear(u16 value) { } + +static inline bool olpc_ec_wakeup_available(void) +{ + return false; +} + #endif /* CONFIG_OLPC */ #endif /* _LINUX_OLPC_EC_H */ -- cgit v1.2.3 From 8097548f3af9ec990169574ad9d874052b78bff8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 13 May 2019 09:56:36 +0200 Subject: Platform: OLPC: Use BIT() and GENMASK() for event masks Just a cosmetic tidy-up. Signed-off-by: Lubomir Rintel Reviewed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko --- include/linux/olpc-ec.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h index 7fa3d27f7fee..f7b6a7eda232 100644 --- a/include/linux/olpc-ec.h +++ b/include/linux/olpc-ec.h @@ -2,6 +2,8 @@ #ifndef _LINUX_OLPC_EC_H #define _LINUX_OLPC_EC_H +#include + /* XO-1 EC commands */ #define EC_FIRMWARE_REV 0x08 #define EC_WRITE_SCI_MASK 0x1b @@ -17,17 +19,16 @@ #define EC_EXT_SCI_QUERY 0x85 /* SCI source values */ -#define EC_SCI_SRC_EMPTY 0x00 -#define EC_SCI_SRC_GAME 0x01 -#define EC_SCI_SRC_BATTERY 0x02 -#define EC_SCI_SRC_BATSOC 0x04 -#define EC_SCI_SRC_BATERR 0x08 -#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */ -#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */ -#define EC_SCI_SRC_ACPWR 0x40 -#define EC_SCI_SRC_BATCRIT 0x80 -#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */ -#define EC_SCI_SRC_ALL 0x1FF +#define EC_SCI_SRC_GAME BIT(0) +#define EC_SCI_SRC_BATTERY BIT(1) +#define EC_SCI_SRC_BATSOC BIT(2) +#define EC_SCI_SRC_BATERR BIT(3) +#define EC_SCI_SRC_EBOOK BIT(4) /* XO-1 only */ +#define EC_SCI_SRC_WLAN BIT(5) /* XO-1 only */ +#define EC_SCI_SRC_ACPWR BIT(6) +#define EC_SCI_SRC_BATCRIT BIT(7) +#define EC_SCI_SRC_GPWAKE BIT(8) /* XO-1.5 only */ +#define EC_SCI_SRC_ALL GENMASK(8, 0) struct platform_device; -- cgit v1.2.3 From 0c3d931b3ab9efeea4948b5373c62095449d0101 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 13 May 2019 09:56:37 +0200 Subject: Platform: OLPC: Add XO-1.75 EC driver It's based off the driver from the OLPC kernel sources. Somewhat modernized and cleaned up, for better or worse. Modified to plug into the olpc-ec driver infrastructure (so that battery interface and debugfs could be reused) and the SPI slave framework. Signed-off-by: Lubomir Rintel Reviewed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko --- arch/x86/Kconfig | 1 + drivers/platform/Kconfig | 2 + drivers/platform/Makefile | 2 +- drivers/platform/olpc/Kconfig | 14 + drivers/platform/olpc/Makefile | 3 +- drivers/platform/olpc/olpc-xo175-ec.c | 752 ++++++++++++++++++++++++++++++++++ include/linux/olpc-ec.h | 4 +- 7 files changed, 774 insertions(+), 4 deletions(-) create mode 100644 drivers/platform/olpc/Kconfig create mode 100644 drivers/platform/olpc/olpc-xo175-ec.c (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..cb1c073b3c7e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2698,6 +2698,7 @@ config OLPC select OF select OF_PROMTREE select IRQ_DOMAIN + select OLPC_EC ---help--- Add support for detecting the unique features of the OLPC XO hardware. diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig index d4c2e424a700..4313d73d3618 100644 --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig @@ -10,3 +10,5 @@ source "drivers/platform/goldfish/Kconfig" source "drivers/platform/chrome/Kconfig" source "drivers/platform/mellanox/Kconfig" + +source "drivers/platform/olpc/Kconfig" diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile index 4b2ce58bcd9c..6fda58c021ca 100644 --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile @@ -6,6 +6,6 @@ obj-$(CONFIG_X86) += x86/ obj-$(CONFIG_MELLANOX_PLATFORM) += mellanox/ obj-$(CONFIG_MIPS) += mips/ -obj-$(CONFIG_OLPC) += olpc/ +obj-$(CONFIG_OLPC_EC) += olpc/ obj-$(CONFIG_GOLDFISH) += goldfish/ obj-$(CONFIG_CHROME_PLATFORMS) += chrome/ diff --git a/drivers/platform/olpc/Kconfig b/drivers/platform/olpc/Kconfig new file mode 100644 index 000000000000..559f843199d7 --- /dev/null +++ b/drivers/platform/olpc/Kconfig @@ -0,0 +1,14 @@ +config OLPC_EC + bool + +config OLPC_XO175_EC + tristate "OLPC XO 1.75 Embedded Controller" + depends on ARCH_MMP || COMPILE_TEST + select SPI_SLAVE + select OLPC_EC + help + Include support for the OLPC XO Embedded Controller (EC). The EC + provides various platform services, including support for the power, + button, restart, shutdown and battery charging status. + + Unless you have an OLPC XO laptop, you will want to say N. diff --git a/drivers/platform/olpc/Makefile b/drivers/platform/olpc/Makefile index dc8b26bc7209..01fe6ba01665 100644 --- a/drivers/platform/olpc/Makefile +++ b/drivers/platform/olpc/Makefile @@ -1,4 +1,5 @@ # # OLPC XO platform-specific drivers # -obj-$(CONFIG_OLPC) += olpc-ec.o +obj-$(CONFIG_OLPC_EC) += olpc-ec.o +obj-$(CONFIG_OLPC_XO175_EC) += olpc-xo175-ec.o diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c new file mode 100644 index 000000000000..344d14f3da54 --- /dev/null +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -0,0 +1,752 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Driver for the OLPC XO-1.75 Embedded Controller. + * + * The EC protocol is documented at: + * http://wiki.laptop.org/go/XO_1.75_HOST_to_EC_Protocol + * + * Copyright (C) 2010 One Laptop per Child Foundation. + * Copyright (C) 2018 Lubomir Rintel + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ec_cmd_t { + u8 cmd; + u8 bytes_returned; +}; + +enum ec_chan_t { + CHAN_NONE = 0, + CHAN_SWITCH, + CHAN_CMD_RESP, + CHAN_KEYBOARD, + CHAN_TOUCHPAD, + CHAN_EVENT, + CHAN_DEBUG, + CHAN_CMD_ERROR, +}; + +/* + * EC events + */ +#define EVENT_AC_CHANGE 1 /* AC plugged/unplugged */ +#define EVENT_BATTERY_STATUS 2 /* Battery low/full/error/gone */ +#define EVENT_BATTERY_CRITICAL 3 /* Battery critical voltage */ +#define EVENT_BATTERY_SOC_CHANGE 4 /* 1% SOC Change */ +#define EVENT_BATTERY_ERROR 5 /* Abnormal error, query for cause */ +#define EVENT_POWER_PRESSED 6 /* Power button was pressed */ +#define EVENT_POWER_PRESS_WAKE 7 /* Woken up with a power button */ +#define EVENT_TIMED_HOST_WAKE 8 /* Host wake timer */ +#define EVENT_OLS_HIGH_LIMIT 9 /* OLS crossed dark threshold */ +#define EVENT_OLS_LOW_LIMIT 10 /* OLS crossed light threshold */ + +/* + * EC commands + * (from http://dev.laptop.org/git/users/rsmith/ec-1.75/tree/ec_cmd.h) + */ +#define CMD_GET_API_VERSION 0x08 /* out: u8 */ +#define CMD_READ_VOLTAGE 0x10 /* out: u16, *9.76/32, mV */ +#define CMD_READ_CURRENT 0x11 /* out: s16, *15.625/120, mA */ +#define CMD_READ_ACR 0x12 /* out: s16, *6250/15, uAh */ +#define CMD_READ_BATT_TEMPERATURE 0x13 /* out: u16, *100/256, deg C */ +#define CMD_READ_AMBIENT_TEMPERATURE 0x14 /* unimplemented, no hardware */ +#define CMD_READ_BATTERY_STATUS 0x15 /* out: u8, bitmask */ +#define CMD_READ_SOC 0x16 /* out: u8, percentage */ +#define CMD_READ_GAUGE_ID 0x17 /* out: u8 * 8 */ +#define CMD_READ_GAUGE_DATA 0x18 /* in: u8 addr, out: u8 data */ +#define CMD_READ_BOARD_ID 0x19 /* out: u16 (platform id) */ +#define CMD_READ_BATT_ERR_CODE 0x1f /* out: u8, error bitmask */ +#define CMD_SET_DCON_POWER 0x26 /* in: u8 */ +#define CMD_RESET_EC 0x28 /* none */ +#define CMD_READ_BATTERY_TYPE 0x2c /* out: u8 */ +#define CMD_SET_AUTOWAK 0x33 /* out: u8 */ +#define CMD_SET_EC_WAKEUP_TIMER 0x36 /* in: u32, out: ? */ +#define CMD_READ_EXT_SCI_MASK 0x37 /* ? */ +#define CMD_WRITE_EXT_SCI_MASK 0x38 /* ? */ +#define CMD_CLEAR_EC_WAKEUP_TIMER 0x39 /* none */ +#define CMD_ENABLE_RUNIN_DISCHARGE 0x3B /* none */ +#define CMD_DISABLE_RUNIN_DISCHARGE 0x3C /* none */ +#define CMD_READ_MPPT_ACTIVE 0x3d /* out: u8 */ +#define CMD_READ_MPPT_LIMIT 0x3e /* out: u8 */ +#define CMD_SET_MPPT_LIMIT 0x3f /* in: u8 */ +#define CMD_DISABLE_MPPT 0x40 /* none */ +#define CMD_ENABLE_MPPT 0x41 /* none */ +#define CMD_READ_VIN 0x42 /* out: u16 */ +#define CMD_EXT_SCI_QUERY 0x43 /* ? */ +#define RSP_KEYBOARD_DATA 0x48 /* ? */ +#define RSP_TOUCHPAD_DATA 0x49 /* ? */ +#define CMD_GET_FW_VERSION 0x4a /* out: u8 * 16 */ +#define CMD_POWER_CYCLE 0x4b /* none */ +#define CMD_POWER_OFF 0x4c /* none */ +#define CMD_RESET_EC_SOFT 0x4d /* none */ +#define CMD_READ_GAUGE_U16 0x4e /* ? */ +#define CMD_ENABLE_MOUSE 0x4f /* ? */ +#define CMD_ECHO 0x52 /* in: u8 * 5, out: u8 * 5 */ +#define CMD_GET_FW_DATE 0x53 /* out: u8 * 16 */ +#define CMD_GET_FW_USER 0x54 /* out: u8 * 16 */ +#define CMD_TURN_OFF_POWER 0x55 /* none (same as 0x4c) */ +#define CMD_READ_OLS 0x56 /* out: u16 */ +#define CMD_OLS_SMT_LEDON 0x57 /* none */ +#define CMD_OLS_SMT_LEDOFF 0x58 /* none */ +#define CMD_START_OLS_ASSY 0x59 /* none */ +#define CMD_STOP_OLS_ASSY 0x5a /* none */ +#define CMD_OLS_SMTTEST_STOP 0x5b /* none */ +#define CMD_READ_VIN_SCALED 0x5c /* out: u16 */ +#define CMD_READ_BAT_MIN_W 0x5d /* out: u16 */ +#define CMD_READ_BAR_MAX_W 0x5e /* out: u16 */ +#define CMD_RESET_BAT_MINMAX_W 0x5f /* none */ +#define CMD_READ_LOCATION 0x60 /* in: u16 addr, out: u8 data */ +#define CMD_WRITE_LOCATION 0x61 /* in: u16 addr, u8 data */ +#define CMD_KEYBOARD_CMD 0x62 /* in: u8, out: ? */ +#define CMD_TOUCHPAD_CMD 0x63 /* in: u8, out: ? */ +#define CMD_GET_FW_HASH 0x64 /* out: u8 * 16 */ +#define CMD_SUSPEND_HINT 0x65 /* in: u8 */ +#define CMD_ENABLE_WAKE_TIMER 0x66 /* in: u8 */ +#define CMD_SET_WAKE_TIMER 0x67 /* in: 32 */ +#define CMD_ENABLE_WAKE_AUTORESET 0x68 /* in: u8 */ +#define CMD_OLS_SET_LIMITS 0x69 /* in: u16, u16 */ +#define CMD_OLS_GET_LIMITS 0x6a /* out: u16, u16 */ +#define CMD_OLS_SET_CEILING 0x6b /* in: u16 */ +#define CMD_OLS_GET_CEILING 0x6c /* out: u16 */ + +/* + * Accepted EC commands, and how many bytes they return. There are plenty + * of EC commands that are no longer implemented, or are implemented only on + * certain older boards. + */ +static const struct ec_cmd_t olpc_xo175_ec_cmds[] = { + { CMD_GET_API_VERSION, 1 }, + { CMD_READ_VOLTAGE, 2 }, + { CMD_READ_CURRENT, 2 }, + { CMD_READ_ACR, 2 }, + { CMD_READ_BATT_TEMPERATURE, 2 }, + { CMD_READ_BATTERY_STATUS, 1 }, + { CMD_READ_SOC, 1 }, + { CMD_READ_GAUGE_ID, 8 }, + { CMD_READ_GAUGE_DATA, 1 }, + { CMD_READ_BOARD_ID, 2 }, + { CMD_READ_BATT_ERR_CODE, 1 }, + { CMD_SET_DCON_POWER, 0 }, + { CMD_RESET_EC, 0 }, + { CMD_READ_BATTERY_TYPE, 1 }, + { CMD_ENABLE_RUNIN_DISCHARGE, 0 }, + { CMD_DISABLE_RUNIN_DISCHARGE, 0 }, + { CMD_READ_MPPT_ACTIVE, 1 }, + { CMD_READ_MPPT_LIMIT, 1 }, + { CMD_SET_MPPT_LIMIT, 0 }, + { CMD_DISABLE_MPPT, 0 }, + { CMD_ENABLE_MPPT, 0 }, + { CMD_READ_VIN, 2 }, + { CMD_GET_FW_VERSION, 16 }, + { CMD_POWER_CYCLE, 0 }, + { CMD_POWER_OFF, 0 }, + { CMD_RESET_EC_SOFT, 0 }, + { CMD_ECHO, 5 }, + { CMD_GET_FW_DATE, 16 }, + { CMD_GET_FW_USER, 16 }, + { CMD_TURN_OFF_POWER, 0 }, + { CMD_READ_OLS, 2 }, + { CMD_OLS_SMT_LEDON, 0 }, + { CMD_OLS_SMT_LEDOFF, 0 }, + { CMD_START_OLS_ASSY, 0 }, + { CMD_STOP_OLS_ASSY, 0 }, + { CMD_OLS_SMTTEST_STOP, 0 }, + { CMD_READ_VIN_SCALED, 2 }, + { CMD_READ_BAT_MIN_W, 2 }, + { CMD_READ_BAR_MAX_W, 2 }, + { CMD_RESET_BAT_MINMAX_W, 0 }, + { CMD_READ_LOCATION, 1 }, + { CMD_WRITE_LOCATION, 0 }, + { CMD_GET_FW_HASH, 16 }, + { CMD_SUSPEND_HINT, 0 }, + { CMD_ENABLE_WAKE_TIMER, 0 }, + { CMD_SET_WAKE_TIMER, 0 }, + { CMD_ENABLE_WAKE_AUTORESET, 0 }, + { CMD_OLS_SET_LIMITS, 0 }, + { CMD_OLS_GET_LIMITS, 4 }, + { CMD_OLS_SET_CEILING, 0 }, + { CMD_OLS_GET_CEILING, 2 }, + { CMD_READ_EXT_SCI_MASK, 2 }, + { CMD_WRITE_EXT_SCI_MASK, 0 }, + + { } +}; + +#define EC_MAX_CMD_DATA_LEN 5 +#define EC_MAX_RESP_LEN 16 + +#define LOG_BUF_SIZE 128 + +#define PM_WAKEUP_TIME 1000 + +#define EC_ALL_EVENTS GENMASK(15, 0) + +enum ec_state_t { + CMD_STATE_IDLE = 0, + CMD_STATE_WAITING_FOR_SWITCH, + CMD_STATE_CMD_IN_TX_FIFO, + CMD_STATE_CMD_SENT, + CMD_STATE_RESP_RECEIVED, + CMD_STATE_ERROR_RECEIVED, +}; + +struct olpc_xo175_ec_cmd { + u8 command; + u8 nr_args; + u8 data_len; + u8 args[EC_MAX_CMD_DATA_LEN]; +}; + +struct olpc_xo175_ec_resp { + u8 channel; + u8 byte; +}; + +struct olpc_xo175_ec { + bool suspended; + + /* SPI related stuff. */ + struct spi_device *spi; + struct spi_transfer xfer; + struct spi_message msg; + union { + struct olpc_xo175_ec_cmd cmd; + struct olpc_xo175_ec_resp resp; + } tx_buf, rx_buf; + + /* GPIO for the CMD signals. */ + struct gpio_desc *gpio_cmd; + + /* Command handling related state. */ + spinlock_t cmd_state_lock; + int cmd_state; + bool cmd_running; + struct completion cmd_done; + struct olpc_xo175_ec_cmd cmd; + u8 resp_data[EC_MAX_RESP_LEN]; + int expected_resp_len; + int resp_len; + + /* Power button. */ + struct input_dev *pwrbtn; + + /* Debug handling. */ + char logbuf[LOG_BUF_SIZE]; + int logbuf_len; +}; + +static struct platform_device *olpc_ec; + +static int olpc_xo175_ec_resp_len(u8 cmd) +{ + const struct ec_cmd_t *p; + + for (p = olpc_xo175_ec_cmds; p->cmd; p++) { + if (p->cmd == cmd) + return p->bytes_returned; + } + + return -EINVAL; +} + +static void olpc_xo175_ec_flush_logbuf(struct olpc_xo175_ec *priv) +{ + dev_dbg(&priv->spi->dev, "got debug string [%*pE]\n", + priv->logbuf_len, priv->logbuf); + priv->logbuf_len = 0; +} + +static void olpc_xo175_ec_complete(void *arg); + +static void olpc_xo175_ec_send_command(struct olpc_xo175_ec *priv, void *cmd, + size_t cmdlen) +{ + int ret; + + memcpy(&priv->tx_buf, cmd, cmdlen); + priv->xfer.len = cmdlen; + + spi_message_init_with_transfers(&priv->msg, &priv->xfer, 1); + + priv->msg.complete = olpc_xo175_ec_complete; + priv->msg.context = priv; + + ret = spi_async(priv->spi, &priv->msg); + if (ret) + dev_err(&priv->spi->dev, "spi_async() failed %d\n", ret); +} + +static void olpc_xo175_ec_read_packet(struct olpc_xo175_ec *priv) +{ + u8 nonce[] = {0xA5, 0x5A}; + + olpc_xo175_ec_send_command(priv, nonce, sizeof(nonce)); +} + +static void olpc_xo175_ec_complete(void *arg) +{ + struct olpc_xo175_ec *priv = arg; + struct device *dev = &priv->spi->dev; + struct power_supply *psy; + unsigned long flags; + u8 channel; + u8 byte; + int ret; + + ret = priv->msg.status; + if (ret) { + dev_err(dev, "SPI transfer failed: %d\n", ret); + + spin_lock_irqsave(&priv->cmd_state_lock, flags); + if (priv->cmd_running) { + priv->resp_len = 0; + priv->cmd_state = CMD_STATE_ERROR_RECEIVED; + complete(&priv->cmd_done); + } + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + + if (ret != -EINTR) + olpc_xo175_ec_read_packet(priv); + + return; + } + + channel = priv->rx_buf.resp.channel; + byte = priv->rx_buf.resp.byte; + + switch (channel) { + case CHAN_NONE: + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + if (!priv->cmd_running) { + /* We can safely ignore these */ + dev_err(dev, "spurious FIFO read packet\n"); + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + return; + } + + priv->cmd_state = CMD_STATE_CMD_SENT; + if (!priv->expected_resp_len) + complete(&priv->cmd_done); + olpc_xo175_ec_read_packet(priv); + + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + return; + + case CHAN_SWITCH: + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + if (!priv->cmd_running) { + /* Just go with the flow */ + dev_err(dev, "spurious SWITCH packet\n"); + memset(&priv->cmd, 0, sizeof(priv->cmd)); + priv->cmd.command = CMD_ECHO; + } + + priv->cmd_state = CMD_STATE_CMD_IN_TX_FIFO; + + /* Throw command into TxFIFO */ + gpiod_set_value_cansleep(priv->gpio_cmd, 0); + olpc_xo175_ec_send_command(priv, &priv->cmd, sizeof(priv->cmd)); + + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + return; + + case CHAN_CMD_RESP: + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + if (!priv->cmd_running) { + dev_err(dev, "spurious response packet\n"); + } else if (priv->resp_len >= priv->expected_resp_len) { + dev_err(dev, "too many response packets\n"); + } else { + priv->resp_data[priv->resp_len++] = byte; + if (priv->resp_len == priv->expected_resp_len) { + priv->cmd_state = CMD_STATE_RESP_RECEIVED; + complete(&priv->cmd_done); + } + } + + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + break; + + case CHAN_CMD_ERROR: + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + if (!priv->cmd_running) { + dev_err(dev, "spurious cmd error packet\n"); + } else { + priv->resp_data[0] = byte; + priv->resp_len = 1; + priv->cmd_state = CMD_STATE_ERROR_RECEIVED; + complete(&priv->cmd_done); + } + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + break; + + case CHAN_KEYBOARD: + dev_warn(dev, "keyboard is not supported\n"); + break; + + case CHAN_TOUCHPAD: + dev_warn(dev, "touchpad is not supported\n"); + break; + + case CHAN_EVENT: + dev_dbg(dev, "got event %.2x\n", byte); + switch (byte) { + case EVENT_AC_CHANGE: + psy = power_supply_get_by_name("olpc-ac"); + if (psy) { + power_supply_changed(psy); + power_supply_put(psy); + } + break; + case EVENT_BATTERY_STATUS: + case EVENT_BATTERY_CRITICAL: + case EVENT_BATTERY_SOC_CHANGE: + case EVENT_BATTERY_ERROR: + psy = power_supply_get_by_name("olpc-battery"); + if (psy) { + power_supply_changed(psy); + power_supply_put(psy); + } + break; + case EVENT_POWER_PRESSED: + input_report_key(priv->pwrbtn, KEY_POWER, 1); + input_sync(priv->pwrbtn); + input_report_key(priv->pwrbtn, KEY_POWER, 0); + input_sync(priv->pwrbtn); + /* fall through */ + case EVENT_POWER_PRESS_WAKE: + case EVENT_TIMED_HOST_WAKE: + pm_wakeup_event(priv->pwrbtn->dev.parent, + PM_WAKEUP_TIME); + break; + default: + dev_dbg(dev, "ignored unknown event %.2x\n", byte); + break; + } + break; + + case CHAN_DEBUG: + if (byte == '\n') { + olpc_xo175_ec_flush_logbuf(priv); + } else if (isprint(byte)) { + priv->logbuf[priv->logbuf_len++] = byte; + if (priv->logbuf_len == LOG_BUF_SIZE) + olpc_xo175_ec_flush_logbuf(priv); + } + break; + + default: + dev_warn(dev, "unknown channel: %d, %.2x\n", channel, byte); + break; + } + + /* Most non-command packets get the TxFIFO refilled and an ACK. */ + olpc_xo175_ec_read_packet(priv); +} + +/* + * This function is protected with a mutex. We can safely assume that + * there will be only one instance of this function running at a time. + * One of the ways in which we enforce this is by waiting until we get + * all response bytes back from the EC, rather than just the number that + * the caller requests (otherwise, we might start a new command while an + * old command's response bytes are still incoming). + */ +static int olpc_xo175_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *resp, + size_t resp_len, void *ec_cb_arg) +{ + struct olpc_xo175_ec *priv = ec_cb_arg; + struct device *dev = &priv->spi->dev; + unsigned long flags; + size_t nr_bytes; + int ret = 0; + + dev_dbg(dev, "CMD %x, %zd bytes expected\n", cmd, resp_len); + + if (inlen > 5) { + dev_err(dev, "command len %zd too big!\n", resp_len); + return -EOVERFLOW; + } + + /* Suspending in the middle of an EC command hoses things badly! */ + if (WARN_ON(priv->suspended)) + return -EBUSY; + + /* Ensure a valid command and return bytes */ + ret = olpc_xo175_ec_resp_len(cmd); + if (ret < 0) { + dev_err_ratelimited(dev, "unknown command 0x%x\n", cmd); + + /* + * Assume the best in our callers, and allow unknown commands + * through. I'm not the charitable type, but it was beaten + * into me. Just maintain a minimum standard of sanity. + */ + if (resp_len > sizeof(priv->resp_data)) { + dev_err(dev, "response too big: %zd!\n", resp_len); + return -EOVERFLOW; + } + nr_bytes = resp_len; + } else { + nr_bytes = (size_t)ret; + } + resp_len = min(resp_len, nr_bytes); + + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + /* Initialize the state machine */ + init_completion(&priv->cmd_done); + priv->cmd_running = true; + priv->cmd_state = CMD_STATE_WAITING_FOR_SWITCH; + memset(&priv->cmd, 0, sizeof(priv->cmd)); + priv->cmd.command = cmd; + priv->cmd.nr_args = inlen; + priv->cmd.data_len = 0; + memcpy(priv->cmd.args, inbuf, inlen); + priv->expected_resp_len = nr_bytes; + priv->resp_len = 0; + + /* Tickle the cmd gpio to get things started */ + gpiod_set_value_cansleep(priv->gpio_cmd, 1); + + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + + /* The irq handler should do the rest */ + if (!wait_for_completion_timeout(&priv->cmd_done, + msecs_to_jiffies(4000))) { + dev_err(dev, "EC cmd error: timeout in STATE %d\n", + priv->cmd_state); + gpiod_set_value_cansleep(priv->gpio_cmd, 0); + spi_slave_abort(priv->spi); + olpc_xo175_ec_read_packet(priv); + return -ETIMEDOUT; + } + + spin_lock_irqsave(&priv->cmd_state_lock, flags); + + /* Deal with the results. */ + if (priv->cmd_state == CMD_STATE_ERROR_RECEIVED) { + /* EC-provided error is in the single response byte */ + dev_err(dev, "command 0x%x returned error 0x%x\n", + cmd, priv->resp_data[0]); + ret = -EREMOTEIO; + } else if (priv->resp_len != nr_bytes) { + dev_err(dev, "command 0x%x returned %d bytes, expected %zd bytes\n", + cmd, priv->resp_len, nr_bytes); + ret = -EREMOTEIO; + } else { + /* + * We may have 8 bytes in priv->resp, but we only care about + * what we've been asked for. If the caller asked for only 2 + * bytes, give them that. We've guaranteed that + * resp_len <= priv->resp_len and priv->resp_len == nr_bytes. + */ + memcpy(resp, priv->resp_data, resp_len); + } + + /* This should already be low, but just in case. */ + gpiod_set_value_cansleep(priv->gpio_cmd, 0); + priv->cmd_running = false; + + spin_unlock_irqrestore(&priv->cmd_state_lock, flags); + + return ret; +} + +static int olpc_xo175_ec_set_event_mask(unsigned int mask) +{ + u8 args[2]; + + args[0] = mask >> 0; + args[1] = mask >> 8; + return olpc_ec_cmd(CMD_WRITE_EXT_SCI_MASK, args, 2, NULL, 0); +} + +static void olpc_xo175_ec_power_off(void) +{ + while (1) { + olpc_ec_cmd(CMD_POWER_OFF, NULL, 0, NULL, 0); + mdelay(1000); + } +} + +static int __maybe_unused olpc_xo175_ec_suspend(struct device *dev) +{ + struct olpc_xo175_ec *priv = dev_get_drvdata(dev); + static struct { + u8 suspend; + u32 suspend_count; + } __packed hintargs; + static unsigned int suspend_count; + + /* + * SOC_SLEEP is not wired to the EC on B3 and earlier boards. + * This command lets the EC know instead. The suspend count doesn't seem + * to be used anywhere but in the EC debug output. + */ + hintargs.suspend = 1; + hintargs.suspend_count = suspend_count++; + olpc_ec_cmd(CMD_SUSPEND_HINT, (void *)&hintargs, sizeof(hintargs), + NULL, 0); + + /* + * After we've sent the suspend hint, don't allow further EC commands + * to be run until we've resumed. Userspace tasks should be frozen, + * but kernel threads and interrupts could still schedule EC commands. + */ + priv->suspended = true; + + return 0; +} + +static int __maybe_unused olpc_xo175_ec_resume_noirq(struct device *dev) +{ + struct olpc_xo175_ec *priv = dev_get_drvdata(dev); + + priv->suspended = false; + + return 0; +} + +static int __maybe_unused olpc_xo175_ec_resume(struct device *dev) +{ + u8 x = 0; + + /* + * The resume hint is only needed if no other commands are + * being sent during resume. all it does is tell the EC + * the SoC is definitely awake. + */ + olpc_ec_cmd(CMD_SUSPEND_HINT, &x, 1, NULL, 0); + + /* Enable all EC events while we're awake */ + olpc_xo175_ec_set_event_mask(EC_ALL_EVENTS); + + return 0; +} + +static struct olpc_ec_driver olpc_xo175_ec_driver = { + .ec_cmd = olpc_xo175_ec_cmd, +}; + +static int olpc_xo175_ec_remove(struct spi_device *spi) +{ + if (pm_power_off == olpc_xo175_ec_power_off) + pm_power_off = NULL; + + spi_slave_abort(spi); + + platform_device_unregister(olpc_ec); + olpc_ec = NULL; + + return 0; +} + +static int olpc_xo175_ec_probe(struct spi_device *spi) +{ + struct olpc_xo175_ec *priv; + int ret; + + if (olpc_ec) { + dev_err(&spi->dev, "OLPC EC already registered.\n"); + return -EBUSY; + } + + priv = devm_kzalloc(&spi->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->gpio_cmd = devm_gpiod_get(&spi->dev, "cmd", GPIOD_OUT_LOW); + if (IS_ERR(priv->gpio_cmd)) { + dev_err(&spi->dev, "failed to get cmd gpio: %ld\n", + PTR_ERR(priv->gpio_cmd)); + return PTR_ERR(priv->gpio_cmd); + } + + priv->spi = spi; + + spin_lock_init(&priv->cmd_state_lock); + priv->cmd_state = CMD_STATE_IDLE; + init_completion(&priv->cmd_done); + + priv->logbuf_len = 0; + + /* Set up power button input device */ + priv->pwrbtn = devm_input_allocate_device(&spi->dev); + if (!priv->pwrbtn) + return -ENOMEM; + priv->pwrbtn->name = "Power Button"; + priv->pwrbtn->dev.parent = &spi->dev; + input_set_capability(priv->pwrbtn, EV_KEY, KEY_POWER); + ret = input_register_device(priv->pwrbtn); + if (ret) { + dev_err(&spi->dev, "error registering input device: %d\n", ret); + return ret; + } + + spi_set_drvdata(spi, priv); + + priv->xfer.rx_buf = &priv->rx_buf; + priv->xfer.tx_buf = &priv->tx_buf; + + olpc_xo175_ec_read_packet(priv); + + olpc_ec_driver_register(&olpc_xo175_ec_driver, priv); + olpc_ec = platform_device_register_resndata(&spi->dev, "olpc-ec", -1, + NULL, 0, NULL, 0); + + /* Enable all EC events while we're awake */ + olpc_xo175_ec_set_event_mask(EC_ALL_EVENTS); + + if (pm_power_off == NULL) + pm_power_off = olpc_xo175_ec_power_off; + + dev_info(&spi->dev, "OLPC XO-1.75 Embedded Controller driver\n"); + + return 0; +} + +static const struct dev_pm_ops olpc_xo175_ec_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, olpc_xo175_ec_resume_noirq) + SET_RUNTIME_PM_OPS(olpc_xo175_ec_suspend, olpc_xo175_ec_resume, NULL) +}; + +static const struct of_device_id olpc_xo175_ec_of_match[] = { + { .compatible = "olpc,xo1.75-ec" }, + { } +}; +MODULE_DEVICE_TABLE(of, olpc_xo175_ec_of_match); + +static struct spi_driver olpc_xo175_ec_spi_driver = { + .driver = { + .name = "olpc-xo175-ec", + .of_match_table = olpc_xo175_ec_of_match, + .pm = &olpc_xo175_ec_pm_ops, + }, + .probe = olpc_xo175_ec_probe, + .remove = olpc_xo175_ec_remove, +}; +module_spi_driver(olpc_xo175_ec_spi_driver); + +MODULE_DESCRIPTION("OLPC XO-1.75 Embedded Controller driver"); +MODULE_AUTHOR("Lennert Buytenhek "); /* Functionality */ +MODULE_AUTHOR("Lubomir Rintel "); /* Bugs */ +MODULE_LICENSE("GPL"); diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h index f7b6a7eda232..c4602364e909 100644 --- a/include/linux/olpc-ec.h +++ b/include/linux/olpc-ec.h @@ -41,7 +41,7 @@ struct olpc_ec_driver { bool wakeup_available; }; -#ifdef CONFIG_OLPC +#ifdef CONFIG_OLPC_EC extern void olpc_ec_driver_register(struct olpc_ec_driver *drv, void *arg); @@ -69,6 +69,6 @@ static inline bool olpc_ec_wakeup_available(void) return false; } -#endif /* CONFIG_OLPC */ +#endif /* CONFIG_OLPC_EC */ #endif /* _LINUX_OLPC_EC_H */ -- cgit v1.2.3 From 9a0f780958bbcb85604636fa340e2a1efaa4f432 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 13 May 2019 13:39:51 +0200 Subject: dmaengine: sudmac: remove unused driver SUDMAC driver was introduced in v3.10 but was never integrated for use by any platform. As it is unused remove it. Signed-off-by: Simon Horman Acked-by: Yoshihiro Shimoda Signed-off-by: Vinod Koul --- drivers/dma/sh/Kconfig | 6 - drivers/dma/sh/Makefile | 1 - drivers/dma/sh/sudmac.c | 414 ------------------------------------------------ include/linux/sudmac.h | 52 ------ 4 files changed, 473 deletions(-) delete mode 100644 drivers/dma/sh/sudmac.c delete mode 100644 include/linux/sudmac.h (limited to 'include/linux') diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index 4d6b02b3b1f1..54d5d0369d3c 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -47,9 +47,3 @@ config RENESAS_USB_DMAC help This driver supports the USB-DMA controller found in the Renesas SoCs. - -config SUDMAC - tristate "Renesas SUDMAC support" - depends on SH_DMAE_BASE - help - Enable support for the Renesas SUDMAC controllers. diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index 42110dd57a56..112fbd22bb3f 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -15,4 +15,3 @@ obj-$(CONFIG_SH_DMAE) += shdma.o obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o -obj-$(CONFIG_SUDMAC) += sudmac.o diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c deleted file mode 100644 index 30cc3553cb8b..000000000000 --- a/drivers/dma/sh/sudmac.c +++ /dev/null @@ -1,414 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Renesas SUDMAC support - * - * Copyright (C) 2013 Renesas Solutions Corp. - * - * based on drivers/dma/sh/shdma.c: - * Copyright (C) 2011-2012 Guennadi Liakhovetski - * Copyright (C) 2009 Nobuhiro Iwamatsu - * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. - * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct sudmac_chan { - struct shdma_chan shdma_chan; - void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ - - u32 offset; /* for CFG, BA, BBC, CA, CBC, DEN */ - u32 cfg; - u32 dint_end_bit; -}; - -struct sudmac_device { - struct shdma_dev shdma_dev; - struct sudmac_pdata *pdata; - void __iomem *chan_reg; -}; - -struct sudmac_regs { - u32 base_addr; - u32 base_byte_count; -}; - -struct sudmac_desc { - struct sudmac_regs hw; - struct shdma_desc shdma_desc; -}; - -#define to_chan(schan) container_of(schan, struct sudmac_chan, shdma_chan) -#define to_desc(sdesc) container_of(sdesc, struct sudmac_desc, shdma_desc) -#define to_sdev(sc) container_of(sc->shdma_chan.dma_chan.device, \ - struct sudmac_device, shdma_dev.dma_dev) - -/* SUDMAC register */ -#define SUDMAC_CH0CFG 0x00 -#define SUDMAC_CH0BA 0x10 -#define SUDMAC_CH0BBC 0x18 -#define SUDMAC_CH0CA 0x20 -#define SUDMAC_CH0CBC 0x28 -#define SUDMAC_CH0DEN 0x30 -#define SUDMAC_DSTSCLR 0x38 -#define SUDMAC_DBUFCTRL 0x3C -#define SUDMAC_DINTCTRL 0x40 -#define SUDMAC_DINTSTS 0x44 -#define SUDMAC_DINTSTSCLR 0x48 -#define SUDMAC_CH0SHCTRL 0x50 - -/* Definitions for the sudmac_channel.config */ -#define SUDMAC_SENDBUFM 0x1000 /* b12: Transmit Buffer Mode */ -#define SUDMAC_RCVENDM 0x0100 /* b8: Receive Data Transfer End Mode */ -#define SUDMAC_LBA_WAIT 0x0030 /* b5-4: Local Bus Access Wait */ - -/* Definitions for the sudmac_channel.dint_end_bit */ -#define SUDMAC_CH1ENDE 0x0002 /* b1: Ch1 DMA Transfer End Int Enable */ -#define SUDMAC_CH0ENDE 0x0001 /* b0: Ch0 DMA Transfer End Int Enable */ - -#define SUDMAC_DRV_NAME "sudmac" - -static void sudmac_writel(struct sudmac_chan *sc, u32 data, u32 reg) -{ - iowrite32(data, sc->base + reg); -} - -static u32 sudmac_readl(struct sudmac_chan *sc, u32 reg) -{ - return ioread32(sc->base + reg); -} - -static bool sudmac_is_busy(struct sudmac_chan *sc) -{ - u32 den = sudmac_readl(sc, SUDMAC_CH0DEN + sc->offset); - - if (den) - return true; /* working */ - - return false; /* waiting */ -} - -static void sudmac_set_reg(struct sudmac_chan *sc, struct sudmac_regs *hw, - struct shdma_desc *sdesc) -{ - sudmac_writel(sc, sc->cfg, SUDMAC_CH0CFG + sc->offset); - sudmac_writel(sc, hw->base_addr, SUDMAC_CH0BA + sc->offset); - sudmac_writel(sc, hw->base_byte_count, SUDMAC_CH0BBC + sc->offset); -} - -static void sudmac_start(struct sudmac_chan *sc) -{ - u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL); - - sudmac_writel(sc, dintctrl | sc->dint_end_bit, SUDMAC_DINTCTRL); - sudmac_writel(sc, 1, SUDMAC_CH0DEN + sc->offset); -} - -static void sudmac_start_xfer(struct shdma_chan *schan, - struct shdma_desc *sdesc) -{ - struct sudmac_chan *sc = to_chan(schan); - struct sudmac_desc *sd = to_desc(sdesc); - - sudmac_set_reg(sc, &sd->hw, sdesc); - sudmac_start(sc); -} - -static bool sudmac_channel_busy(struct shdma_chan *schan) -{ - struct sudmac_chan *sc = to_chan(schan); - - return sudmac_is_busy(sc); -} - -static void sudmac_setup_xfer(struct shdma_chan *schan, int slave_id) -{ -} - -static const struct sudmac_slave_config *sudmac_find_slave( - struct sudmac_chan *sc, int slave_id) -{ - struct sudmac_device *sdev = to_sdev(sc); - struct sudmac_pdata *pdata = sdev->pdata; - const struct sudmac_slave_config *cfg; - int i; - - for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) - if (cfg->slave_id == slave_id) - return cfg; - - return NULL; -} - -static int sudmac_set_slave(struct shdma_chan *schan, int slave_id, - dma_addr_t slave_addr, bool try) -{ - struct sudmac_chan *sc = to_chan(schan); - const struct sudmac_slave_config *cfg = sudmac_find_slave(sc, slave_id); - - if (!cfg) - return -ENODEV; - - return 0; -} - -static inline void sudmac_dma_halt(struct sudmac_chan *sc) -{ - u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL); - - sudmac_writel(sc, 0, SUDMAC_CH0DEN + sc->offset); - sudmac_writel(sc, dintctrl & ~sc->dint_end_bit, SUDMAC_DINTCTRL); - sudmac_writel(sc, sc->dint_end_bit, SUDMAC_DINTSTSCLR); -} - -static int sudmac_desc_setup(struct shdma_chan *schan, - struct shdma_desc *sdesc, - dma_addr_t src, dma_addr_t dst, size_t *len) -{ - struct sudmac_chan *sc = to_chan(schan); - struct sudmac_desc *sd = to_desc(sdesc); - - dev_dbg(sc->shdma_chan.dev, "%s: src=%pad, dst=%pad, len=%zu\n", - __func__, &src, &dst, *len); - - if (*len > schan->max_xfer_len) - *len = schan->max_xfer_len; - - if (dst) - sd->hw.base_addr = dst; - else if (src) - sd->hw.base_addr = src; - sd->hw.base_byte_count = *len; - - return 0; -} - -static void sudmac_halt(struct shdma_chan *schan) -{ - struct sudmac_chan *sc = to_chan(schan); - - sudmac_dma_halt(sc); -} - -static bool sudmac_chan_irq(struct shdma_chan *schan, int irq) -{ - struct sudmac_chan *sc = to_chan(schan); - u32 dintsts = sudmac_readl(sc, SUDMAC_DINTSTS); - - if (!(dintsts & sc->dint_end_bit)) - return false; - - /* DMA stop */ - sudmac_dma_halt(sc); - - return true; -} - -static size_t sudmac_get_partial(struct shdma_chan *schan, - struct shdma_desc *sdesc) -{ - struct sudmac_chan *sc = to_chan(schan); - struct sudmac_desc *sd = to_desc(sdesc); - u32 current_byte_count = sudmac_readl(sc, SUDMAC_CH0CBC + sc->offset); - - return sd->hw.base_byte_count - current_byte_count; -} - -static bool sudmac_desc_completed(struct shdma_chan *schan, - struct shdma_desc *sdesc) -{ - struct sudmac_chan *sc = to_chan(schan); - struct sudmac_desc *sd = to_desc(sdesc); - u32 current_addr = sudmac_readl(sc, SUDMAC_CH0CA + sc->offset); - - return sd->hw.base_addr + sd->hw.base_byte_count == current_addr; -} - -static int sudmac_chan_probe(struct sudmac_device *su_dev, int id, int irq, - unsigned long flags) -{ - struct shdma_dev *sdev = &su_dev->shdma_dev; - struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev); - struct sudmac_chan *sc; - struct shdma_chan *schan; - int err; - - sc = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_chan), GFP_KERNEL); - if (!sc) - return -ENOMEM; - - schan = &sc->shdma_chan; - schan->max_xfer_len = 64 * 1024 * 1024 - 1; - - shdma_chan_probe(sdev, schan, id); - - sc->base = su_dev->chan_reg; - - /* get platform_data */ - sc->offset = su_dev->pdata->channel->offset; - if (su_dev->pdata->channel->config & SUDMAC_TX_BUFFER_MODE) - sc->cfg |= SUDMAC_SENDBUFM; - if (su_dev->pdata->channel->config & SUDMAC_RX_END_MODE) - sc->cfg |= SUDMAC_RCVENDM; - sc->cfg |= (su_dev->pdata->channel->wait << 4) & SUDMAC_LBA_WAIT; - - if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH0) - sc->dint_end_bit |= SUDMAC_CH0ENDE; - if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH1) - sc->dint_end_bit |= SUDMAC_CH1ENDE; - - /* set up channel irq */ - if (pdev->id >= 0) - snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d.%d", - pdev->id, id); - else - snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d", id); - - err = shdma_request_irq(schan, irq, flags, sc->dev_id); - if (err) { - dev_err(sdev->dma_dev.dev, - "DMA channel %d request_irq failed %d\n", id, err); - goto err_no_irq; - } - - return 0; - -err_no_irq: - /* remove from dmaengine device node */ - shdma_chan_remove(schan); - return err; -} - -static void sudmac_chan_remove(struct sudmac_device *su_dev) -{ - struct shdma_chan *schan; - int i; - - shdma_for_each_chan(schan, &su_dev->shdma_dev, i) { - BUG_ON(!schan); - - shdma_chan_remove(schan); - } -} - -static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan) -{ - /* SUDMAC doesn't need the address */ - return 0; -} - -static struct shdma_desc *sudmac_embedded_desc(void *buf, int i) -{ - return &((struct sudmac_desc *)buf)[i].shdma_desc; -} - -static const struct shdma_ops sudmac_shdma_ops = { - .desc_completed = sudmac_desc_completed, - .halt_channel = sudmac_halt, - .channel_busy = sudmac_channel_busy, - .slave_addr = sudmac_slave_addr, - .desc_setup = sudmac_desc_setup, - .set_slave = sudmac_set_slave, - .setup_xfer = sudmac_setup_xfer, - .start_xfer = sudmac_start_xfer, - .embedded_desc = sudmac_embedded_desc, - .chan_irq = sudmac_chan_irq, - .get_partial = sudmac_get_partial, -}; - -static int sudmac_probe(struct platform_device *pdev) -{ - struct sudmac_pdata *pdata = dev_get_platdata(&pdev->dev); - int err, i; - struct sudmac_device *su_dev; - struct dma_device *dma_dev; - struct resource *chan, *irq_res; - - /* get platform data */ - if (!pdata) - return -ENODEV; - - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!irq_res) - return -ENODEV; - - err = -ENOMEM; - su_dev = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_device), - GFP_KERNEL); - if (!su_dev) - return err; - - dma_dev = &su_dev->shdma_dev.dma_dev; - - chan = platform_get_resource(pdev, IORESOURCE_MEM, 0); - su_dev->chan_reg = devm_ioremap_resource(&pdev->dev, chan); - if (IS_ERR(su_dev->chan_reg)) - return PTR_ERR(su_dev->chan_reg); - - dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); - - su_dev->shdma_dev.ops = &sudmac_shdma_ops; - su_dev->shdma_dev.desc_size = sizeof(struct sudmac_desc); - err = shdma_init(&pdev->dev, &su_dev->shdma_dev, pdata->channel_num); - if (err < 0) - return err; - - /* platform data */ - su_dev->pdata = dev_get_platdata(&pdev->dev); - - platform_set_drvdata(pdev, su_dev); - - /* Create DMA Channel */ - for (i = 0; i < pdata->channel_num; i++) { - err = sudmac_chan_probe(su_dev, i, irq_res->start, IRQF_SHARED); - if (err) - goto chan_probe_err; - } - - err = dma_async_device_register(&su_dev->shdma_dev.dma_dev); - if (err < 0) - goto chan_probe_err; - - return err; - -chan_probe_err: - sudmac_chan_remove(su_dev); - - shdma_cleanup(&su_dev->shdma_dev); - - return err; -} - -static int sudmac_remove(struct platform_device *pdev) -{ - struct sudmac_device *su_dev = platform_get_drvdata(pdev); - struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev; - - dma_async_device_unregister(dma_dev); - sudmac_chan_remove(su_dev); - shdma_cleanup(&su_dev->shdma_dev); - - return 0; -} - -static struct platform_driver sudmac_driver = { - .driver = { - .name = SUDMAC_DRV_NAME, - }, - .probe = sudmac_probe, - .remove = sudmac_remove, -}; -module_platform_driver(sudmac_driver); - -MODULE_AUTHOR("Yoshihiro Shimoda"); -MODULE_DESCRIPTION("Renesas SUDMAC driver"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:" SUDMAC_DRV_NAME); diff --git a/include/linux/sudmac.h b/include/linux/sudmac.h deleted file mode 100644 index 377b8a5788fa..000000000000 --- a/include/linux/sudmac.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Header for the SUDMAC driver - * - * Copyright (C) 2013 Renesas Solutions Corp. - * - * This is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - */ -#ifndef SUDMAC_H -#define SUDMAC_H - -#include -#include -#include - -/* Used by slave DMA clients to request DMA to/from a specific peripheral */ -struct sudmac_slave { - struct shdma_slave shdma_slave; /* Set by the platform */ -}; - -/* - * Supplied by platforms to specify, how a DMA channel has to be configured for - * a certain peripheral - */ -struct sudmac_slave_config { - int slave_id; -}; - -struct sudmac_channel { - unsigned long offset; - unsigned long config; - unsigned long wait; /* The configuable range is 0 to 3 */ - unsigned long dint_end_bit; -}; - -struct sudmac_pdata { - const struct sudmac_slave_config *slave; - int slave_num; - const struct sudmac_channel *channel; - int channel_num; -}; - -/* Definitions for the sudmac_channel.config */ -#define SUDMAC_TX_BUFFER_MODE BIT(0) -#define SUDMAC_RX_END_MODE BIT(1) - -/* Definitions for the sudmac_channel.dint_end_bit */ -#define SUDMAC_DMA_BIT_CH0 BIT(0) -#define SUDMAC_DMA_BIT_CH1 BIT(1) - -#endif -- cgit v1.2.3 From 7e5f7bb08b8cefd3a7e8961861f47fe1f0e830d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 May 2019 13:44:57 +0100 Subject: unexport simple_dname() Signed-off-by: Al Viro --- fs/d_path.c | 1 - fs/internal.h | 1 + include/linux/dcache.h | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/d_path.c b/fs/d_path.c index e8fce6b1174f..a7d0a96b35ce 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -316,7 +316,6 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) end = ERR_PTR(-ENAMETOOLONG); return end; } -EXPORT_SYMBOL(simple_dname); /* * Write full pathname from the root of the filesystem into the buffer. diff --git a/fs/internal.h b/fs/internal.h index 0010889f2e85..1ac2b8f6c621 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -160,6 +160,7 @@ extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); extern struct dentry *d_alloc_cursor(struct dentry *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); +extern char *simple_dname(struct dentry *, char *, int); /* * read_write.c diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f14e587c5d5d..361305ddd75e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -291,7 +291,6 @@ static inline unsigned d_count(const struct dentry *dentry) */ extern __printf(4, 5) char *dynamic_dname(struct dentry *, char *, int, const char *, ...); -extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); -- cgit v1.2.3 From 97a7968448cb0ef5c15e3d395746b108b1a55556 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 15 May 2019 10:20:41 -0500 Subject: usb: renesas_usbhs: move flags to param Move options from 'flags' field in private structure to param structure where other options are already being kept. Signed-off-by: Chris Brandt Reviewed-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 23 +++++++---------------- drivers/usb/renesas_usbhs/common.h | 2 -- include/linux/usb/renesas_usbhs.h | 1 + 3 files changed, 8 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 0ca89de7f842..1de7a44f3415 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -43,15 +43,6 @@ * | .... | +-----------+ */ - -#define USBHSF_RUNTIME_PWCTRL (1 << 0) - -/* status */ -#define usbhsc_flags_init(p) do {(p)->flags = 0; } while (0) -#define usbhsc_flags_set(p, b) ((p)->flags |= (b)) -#define usbhsc_flags_clr(p, b) ((p)->flags &= ~(b)) -#define usbhsc_flags_has(p, b) ((p)->flags & (b)) - /* * platform call back * @@ -479,7 +470,7 @@ static void usbhsc_hotplug(struct usbhs_priv *priv) dev_dbg(&pdev->dev, "%s enable\n", __func__); /* power on */ - if (usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, enable); /* bus init */ @@ -499,7 +490,7 @@ static void usbhsc_hotplug(struct usbhs_priv *priv) usbhsc_bus_init(priv); /* power off */ - if (usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, enable); usbhs_mod_change(priv, -1); @@ -733,7 +724,7 @@ static int usbhs_probe(struct platform_device *pdev) /* FIXME */ /* runtime power control ? */ if (priv->pfunc.get_vbus) - usbhsc_flags_set(priv, USBHSF_RUNTIME_PWCTRL); + usbhs_get_dparam(priv, runtime_pwctrl) = 1; /* * priv settings @@ -807,7 +798,7 @@ static int usbhs_probe(struct platform_device *pdev) /* power control */ pm_runtime_enable(&pdev->dev); - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) { + if (!usbhs_get_dparam(priv, runtime_pwctrl)) { usbhsc_power_ctrl(priv, 1); usbhs_mod_autonomy_mode(priv); } @@ -848,7 +839,7 @@ static int usbhs_remove(struct platform_device *pdev) dfunc->notify_hotplug = NULL; /* power off */ - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (!usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); pm_runtime_disable(&pdev->dev); @@ -873,7 +864,7 @@ static __maybe_unused int usbhsc_suspend(struct device *dev) usbhs_mod_change(priv, -1); } - if (mod || !usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (mod || !usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); return 0; @@ -884,7 +875,7 @@ static __maybe_unused int usbhsc_resume(struct device *dev) struct usbhs_priv *priv = dev_get_drvdata(dev); struct platform_device *pdev = usbhs_priv_to_pdev(priv); - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) { + if (!usbhs_get_dparam(priv, runtime_pwctrl)) { usbhsc_power_ctrl(priv, 1); usbhs_mod_autonomy_mode(priv); } diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index de1a6638bf68..1fbffb7bbc8f 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -260,8 +260,6 @@ struct usbhs_priv { spinlock_t lock; - u32 flags; - /* * module control */ diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 53924f8e840c..17fae6e504cc 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -189,6 +189,7 @@ struct renesas_usbhs_driver_param { u32 has_otg:1; /* for controlling PWEN/EXTLP */ u32 has_sudmac:1; /* for SUDMAC */ u32 has_usb_dmac:1; /* for USB-DMAC */ + u32 runtime_pwctrl:1; #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -- cgit v1.2.3 From 2195e3af9079ea067079e98446ea6a457c81a98c Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 15 May 2019 10:20:42 -0500 Subject: usb: renesas_usbhs: add support for CNEN bit For some SoC, CNEN must be set for USB Device mode operation. Signed-off-by: Chris Brandt Reviewed-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 6 ++++++ drivers/usb/renesas_usbhs/common.h | 1 + include/linux/usb/renesas_usbhs.h | 1 + 3 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 1de7a44f3415..734fb4e542c5 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -114,6 +114,12 @@ void usbhs_sys_function_ctrl(struct usbhs_priv *priv, int enable) u16 mask = DCFM | DRPD | DPRPU | HSE | USBE; u16 val = HSE | USBE; + /* CNEN bit is required for function operation */ + if (usbhs_get_dparam(priv, has_cnen)) { + mask |= CNEN; + val |= CNEN; + } + /* * if enable * diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index 1fbffb7bbc8f..de74ebd1a347 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -104,6 +104,7 @@ struct usbhs_priv; /* SYSCFG */ #define SCKE (1 << 10) /* USB Module Clock Enable */ +#define CNEN (1 << 8) /* Single-ended receiver operation Enable */ #define HSE (1 << 7) /* High-Speed Operation Enable */ #define DCFM (1 << 6) /* Controller Function Select */ #define DRPD (1 << 5) /* D+ Line/D- Line Resistance Control */ diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 17fae6e504cc..9097a38fcda8 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -190,6 +190,7 @@ struct renesas_usbhs_driver_param { u32 has_sudmac:1; /* for SUDMAC */ u32 has_usb_dmac:1; /* for USB-DMAC */ u32 runtime_pwctrl:1; + u32 has_cnen:1; #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -- cgit v1.2.3 From f756066990607dbe8ea5579c925b48e646891f3e Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 15 May 2019 10:20:43 -0500 Subject: usb: renesas_usbhs: support byte addressable CFIFO Some SoC have a CFIFO register that is byte addressable. This means when the CFIFO access is set to 32-bit, you can write 8-bit values to addresses CFIFO+0, CFIFO+1, CFIFO+2, CFIFO+3. Signed-off-by: Chris Brandt Reviewed-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 9 +++++++-- include/linux/usb/renesas_usbhs.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 39fa2fc1b8b7..452b456ac24e 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -543,8 +543,13 @@ static int usbhsf_pio_try_push(struct usbhs_pkt *pkt, int *is_done) } /* the rest operation */ - for (i = 0; i < len; i++) - iowrite8(buf[i], addr + (0x03 - (i & 0x03))); + if (usbhs_get_dparam(priv, cfifo_byte_addr)) { + for (i = 0; i < len; i++) + iowrite8(buf[i], addr + (i & 0x03)); + } else { + for (i = 0; i < len; i++) + iowrite8(buf[i], addr + (0x03 - (i & 0x03))); + } /* * variable update diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 9097a38fcda8..87043fd21d54 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -191,6 +191,7 @@ struct renesas_usbhs_driver_param { u32 has_usb_dmac:1; /* for USB-DMAC */ u32 runtime_pwctrl:1; u32 has_cnen:1; + u32 cfifo_byte_addr:1; /* CFIFO is byte addressable */ #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -- cgit v1.2.3 From b69dce6341053cd51f3692a2ab3825140fad6ab8 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 15 May 2019 10:20:44 -0500 Subject: usb: renesas_usbhs: Add support for RZ/A2 The RZ/A2 is similar to the R-Car Gen3 with some small differences. Signed-off-by: Chris Brandt Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/Makefile | 2 +- drivers/usb/renesas_usbhs/common.c | 15 ++++++++ drivers/usb/renesas_usbhs/rza.h | 1 + drivers/usb/renesas_usbhs/rza2.c | 72 ++++++++++++++++++++++++++++++++++++++ include/linux/usb/renesas_usbhs.h | 1 + 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 drivers/usb/renesas_usbhs/rza2.c (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/Makefile b/drivers/usb/renesas_usbhs/Makefile index 5c5b51bb48ef..a1fed56b0957 100644 --- a/drivers/usb/renesas_usbhs/Makefile +++ b/drivers/usb/renesas_usbhs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_USB_RENESAS_USBHS) += renesas_usbhs.o -renesas_usbhs-y := common.o mod.o pipe.o fifo.o rcar2.o rcar3.o rza.o +renesas_usbhs-y := common.o mod.o pipe.o fifo.o rcar2.o rcar3.o rza.o rza2.o ifneq ($(CONFIG_USB_RENESAS_USBHS_HCD),) renesas_usbhs-y += mod_host.o diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 734fb4e542c5..c7c9c5d75a56 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -571,6 +571,17 @@ static const struct usbhs_of_data rza1_data = { } }; +static const struct usbhs_of_data rza2_data = { + .platform_callback = &usbhs_rza2_ops, + .param = { + .type = USBHS_TYPE_RZA2, + .has_cnen = 1, + .cfifo_byte_addr = 1, + .pipe_configs = usbhsc_new_pipe, + .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + } +}; + /* * platform functions */ @@ -619,6 +630,10 @@ static const struct of_device_id usbhs_of_match[] = { .compatible = "renesas,rza1-usbhs", .data = &rza1_data, }, + { + .compatible = "renesas,rza2-usbhs", + .data = &rza2_data, + }, { }, }; MODULE_DEVICE_TABLE(of, usbhs_of_match); diff --git a/drivers/usb/renesas_usbhs/rza.h b/drivers/usb/renesas_usbhs/rza.h index ca917ca54f6d..073a53d1d442 100644 --- a/drivers/usb/renesas_usbhs/rza.h +++ b/drivers/usb/renesas_usbhs/rza.h @@ -2,3 +2,4 @@ #include "common.h" extern const struct renesas_usbhs_platform_callback usbhs_rza1_ops; +extern const struct renesas_usbhs_platform_callback usbhs_rza2_ops; diff --git a/drivers/usb/renesas_usbhs/rza2.c b/drivers/usb/renesas_usbhs/rza2.c new file mode 100644 index 000000000000..9d8551f93533 --- /dev/null +++ b/drivers/usb/renesas_usbhs/rza2.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas USB driver RZ/A2 initialization and power control + * + * Copyright (C) 2019 Chris Brandt + * Copyright (C) 2019 Renesas Electronics Corporation + */ + +#include +#include +#include +#include +#include "common.h" +#include "rza.h" + +static int usbhs_rza2_hardware_init(struct platform_device *pdev) +{ + struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); + struct phy *phy = phy_get(&pdev->dev, "usb"); + + if (IS_ERR(phy)) + return PTR_ERR(phy); + + priv->phy = phy; + return 0; +} + +static int usbhs_rza2_hardware_exit(struct platform_device *pdev) +{ + struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); + + phy_put(priv->phy); + priv->phy = NULL; + + return 0; +} + +static int usbhs_rza2_power_ctrl(struct platform_device *pdev, + void __iomem *base, int enable) +{ + struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); + int retval = 0; + + if (!priv->phy) + return -ENODEV; + + if (enable) { + retval = phy_init(priv->phy); + usbhs_bset(priv, SUSPMODE, SUSPM, SUSPM); + udelay(100); /* Wait for PLL to become stable */ + if (!retval) + retval = phy_power_on(priv->phy); + } else { + usbhs_bset(priv, SUSPMODE, SUSPM, 0); + phy_power_off(priv->phy); + phy_exit(priv->phy); + } + + return retval; +} + +static int usbhs_rza2_get_id(struct platform_device *pdev) +{ + return USBHS_GADGET; +} + +const struct renesas_usbhs_platform_callback usbhs_rza2_ops = { + .hardware_init = usbhs_rza2_hardware_init, + .hardware_exit = usbhs_rza2_hardware_exit, + .power_ctrl = usbhs_rza2_power_ctrl, + .get_id = usbhs_rza2_get_id, +}; diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 87043fd21d54..3f53043fb56b 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -199,6 +199,7 @@ struct renesas_usbhs_driver_param { #define USBHS_TYPE_RCAR_GEN3 2 #define USBHS_TYPE_RCAR_GEN3_WITH_PLL 3 #define USBHS_TYPE_RZA1 4 +#define USBHS_TYPE_RZA2 5 /* * option: -- cgit v1.2.3 From b48345aafb203803ccda4488cb5409b1ed435c0a Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 10 May 2019 12:21:49 -0400 Subject: audit: deliver signal_info regarless of syscall When a process signals the audit daemon (shutdown, rotate, resume, reconfig) but syscall auditing is not enabled, we still want to know the identity of the process sending the signal to the audit daemon. Move audit_signal_info() out of syscall auditing to general auditing but create a new function audit_signal_info_syscall() to take care of the syscall dependent parts for when syscall auditing is enabled. Please see the github kernel audit issue https://github.com/linux-audit/audit-kernel/issues/111 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 9 +++++++++ kernel/audit.c | 27 +++++++++++++++++++++++++++ kernel/audit.h | 8 ++++++-- kernel/auditsc.c | 19 +++---------------- kernel/signal.c | 2 +- 5 files changed, 46 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 43a23e28ba23..b4078560cb73 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -196,6 +196,9 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) } extern u32 audit_enabled; + +extern int audit_signal_info(int sig, struct task_struct *t); + #else /* CONFIG_AUDIT */ static inline __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, @@ -249,6 +252,12 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) } #define audit_enabled AUDIT_OFF + +static inline int audit_signal_info(int sig, struct task_struct *t) +{ + return 0; +} + #endif /* CONFIG_AUDIT */ #ifdef CONFIG_AUDIT_COMPAT_GENERIC diff --git a/kernel/audit.c b/kernel/audit.c index b96bf69183f4..67399ff72d43 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2273,6 +2273,33 @@ out: return rc; } +/** + * audit_signal_info - record signal info for shutting down audit subsystem + * @sig: signal value + * @t: task being signaled + * + * If the audit subsystem is being terminated, record the task (pid) + * and uid that is doing that. + */ +int audit_signal_info(int sig, struct task_struct *t) +{ + kuid_t uid = current_uid(), auid; + + if (auditd_test_task(t) && + (sig == SIGTERM || sig == SIGHUP || + sig == SIGUSR1 || sig == SIGUSR2)) { + audit_sig_pid = task_tgid_nr(current); + auid = audit_get_loginuid(current); + if (uid_valid(auid)) + audit_sig_uid = auid; + else + audit_sig_uid = uid; + security_task_getsecid(current, &audit_sig_sid); + } + + return audit_signal_info_syscall(t); +} + /** * audit_log_end - end one audit record * @ab: the audit_buffer diff --git a/kernel/audit.h b/kernel/audit.h index 2071725a999f..996d94faad43 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -299,7 +299,7 @@ extern const char *audit_tree_path(struct audit_tree *tree); extern void audit_put_tree(struct audit_tree *tree); extern void audit_kill_trees(struct audit_context *context); -extern int audit_signal_info(int sig, struct task_struct *t); +extern int audit_signal_info_syscall(struct task_struct *t); extern void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx); extern struct list_head *audit_killed_trees(void); @@ -330,7 +330,11 @@ extern struct list_head *audit_killed_trees(void); #define audit_tree_path(rule) "" /* never called */ #define audit_kill_trees(context) BUG() -#define audit_signal_info(s, t) AUDIT_DISABLED +static inline int audit_signal_info_syscall(struct task_struct *t) +{ + return 0; +} + #define audit_filter_inodes(t, c) AUDIT_DISABLED #endif /* CONFIG_AUDITSYSCALL */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 95ae27edd417..30aa07b0115f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2360,30 +2360,17 @@ void __audit_ptrace(struct task_struct *t) } /** - * audit_signal_info - record signal info for shutting down audit subsystem - * @sig: signal value + * audit_signal_info_syscall - record signal info for syscalls * @t: task being signaled * * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ -int audit_signal_info(int sig, struct task_struct *t) +int audit_signal_info_syscall(struct task_struct *t) { struct audit_aux_data_pids *axp; struct audit_context *ctx = audit_context(); - kuid_t uid = current_uid(), auid, t_uid = task_uid(t); - - if (auditd_test_task(t) && - (sig == SIGTERM || sig == SIGHUP || - sig == SIGUSR1 || sig == SIGUSR2)) { - audit_sig_pid = task_tgid_nr(current); - auid = audit_get_loginuid(current); - if (uid_valid(auid)) - audit_sig_uid = auid; - else - audit_sig_uid = uid; - security_task_getsecid(current, &audit_sig_sid); - } + kuid_t t_uid = task_uid(t); if (!audit_signals || audit_dummy_context()) return 0; diff --git a/kernel/signal.c b/kernel/signal.c index a1eb44dc9ff5..5cfc8611867b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -44,6 +44,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -53,7 +54,6 @@ #include #include #include -#include "audit.h" /* audit_signal_info() */ /* * SLAB caches for signal bits. -- cgit v1.2.3 From 2e21865faf4fd7ca99eb2ace072c6d618059e342 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 May 2019 14:06:51 +0100 Subject: keys: sparse: Fix key_fs[ug]id_changed() Sparse warnings are incurred by key_fs[ug]id_changed() due to unprotected accesses of tsk->cred, which is marked __rcu. Fix this by passing the new cred struct to these functions from commit_creds() rather than the task pointer. Signed-off-by: David Howells Reviewed-by: James Morris --- include/linux/key.h | 8 ++++---- kernel/cred.c | 4 ++-- security/keys/process_keys.c | 22 ++++++++++------------ 3 files changed, 16 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 7099985e35a9..1f09aad1c98c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -402,8 +402,8 @@ extern struct ctl_table key_sysctls[]; * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); -extern void key_fsuid_changed(struct task_struct *tsk); -extern void key_fsgid_changed(struct task_struct *tsk); +extern void key_fsuid_changed(struct cred *new_cred); +extern void key_fsgid_changed(struct cred *new_cred); extern void key_init(void); #else /* CONFIG_KEYS */ @@ -418,8 +418,8 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define key_fsuid_changed(t) do { } while(0) -#define key_fsgid_changed(t) do { } while(0) +#define key_fsuid_changed(c) do { } while(0) +#define key_fsgid_changed(c) do { } while(0) #define key_init() do { } while(0) #endif /* CONFIG_KEYS */ diff --git a/kernel/cred.c b/kernel/cred.c index 45d77284aed0..3bd40de9e192 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -455,9 +455,9 @@ int commit_creds(struct cred *new) /* alter the thread keyring */ if (!uid_eq(new->fsuid, old->fsuid)) - key_fsuid_changed(task); + key_fsuid_changed(new); if (!gid_eq(new->fsgid, old->fsgid)) - key_fsgid_changed(task); + key_fsgid_changed(new); /* do it * RLIMIT_NPROC limits on user->processes have already been checked diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index f05f7125a7d5..ba5d3172cafe 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -293,28 +293,26 @@ static int install_session_keyring(struct key *keyring) /* * Handle the fsuid changing. */ -void key_fsuid_changed(struct task_struct *tsk) +void key_fsuid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ - BUG_ON(!tsk->cred); - if (tsk->cred->thread_keyring) { - down_write(&tsk->cred->thread_keyring->sem); - tsk->cred->thread_keyring->uid = tsk->cred->fsuid; - up_write(&tsk->cred->thread_keyring->sem); + if (new_cred->thread_keyring) { + down_write(&new_cred->thread_keyring->sem); + new_cred->thread_keyring->uid = new_cred->fsuid; + up_write(&new_cred->thread_keyring->sem); } } /* * Handle the fsgid changing. */ -void key_fsgid_changed(struct task_struct *tsk) +void key_fsgid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ - BUG_ON(!tsk->cred); - if (tsk->cred->thread_keyring) { - down_write(&tsk->cred->thread_keyring->sem); - tsk->cred->thread_keyring->gid = tsk->cred->fsgid; - up_write(&tsk->cred->thread_keyring->sem); + if (new_cred->thread_keyring) { + down_write(&new_cred->thread_keyring->sem); + new_cred->thread_keyring->gid = new_cred->fsgid; + up_write(&new_cred->thread_keyring->sem); } } -- cgit v1.2.3 From f13e143e7444bffc53f5c2904aeed76646da69d6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 3 Jul 2018 16:42:26 +0200 Subject: dma-buf: start caching of sg_table objects v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow a smooth transition from pinning buffer objects to dynamic invalidation we first start to cache the sg_table for an attachment. v2: keep closer to the DRM implementation Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.kernel.org/patch/10943053/ --- drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++-- include/linux/dma-buf.h | 13 +++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 3ae6c0c2cc02..f4104a21b069 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -576,6 +576,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, list_add(&attach->node, &dmabuf->attachments); mutex_unlock(&dmabuf->lock); + return attach; err_attach: @@ -598,6 +599,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) if (WARN_ON(!dmabuf || !attach)) return; + if (attach->sgt) + dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir); + mutex_lock(&dmabuf->lock); list_del(&attach->node); if (dmabuf->ops->detach) @@ -633,10 +637,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, if (WARN_ON(!attach || !attach->dmabuf)) return ERR_PTR(-EINVAL); + if (attach->sgt) { + /* + * Two mappings with different directions for the same + * attachment are not allowed. + */ + if (attach->dir != direction && + attach->dir != DMA_BIDIRECTIONAL) + return ERR_PTR(-EBUSY); + + return attach->sgt; + } + sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); if (!sg_table) sg_table = ERR_PTR(-ENOMEM); + if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) { + attach->sgt = sg_table; + attach->dir = direction; + } + return sg_table; } EXPORT_SYMBOL_GPL(dma_buf_map_attachment); @@ -660,8 +681,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) return; - attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, - direction); + if (attach->sgt == sg_table) + return; + + attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); } EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index a0bd071466fc..8a327566d7f4 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -44,6 +44,15 @@ struct dma_buf_attachment; * @vunmap: [optional] unmaps a vmap from the buffer */ struct dma_buf_ops { + /** + * @cache_sgt_mapping: + * + * If true the framework will cache the first mapping made for each + * attachment. This avoids creating mappings for attachments multiple + * times. + */ + bool cache_sgt_mapping; + /** * @attach: * @@ -323,6 +332,8 @@ struct dma_buf { * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment. + * @sgt: cached mapping. + * @dir: direction of cached mapping. * @priv: exporter specific attachment data. * * This structure holds the attachment information between the dma_buf buffer @@ -338,6 +349,8 @@ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; + struct sg_table *sgt; + enum dma_data_direction dir; void *priv; }; -- cgit v1.2.3 From fbb5d0353c62d10c3699ec844d2d015a762952d7 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Thu, 16 May 2019 19:40:06 +0530 Subject: drm: Add HDR source metadata property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a blob property to get HDR metadata information from userspace. This will be send as part of AVI Infoframe to panel. It also implements get() and set() functions for HDR output metadata property.The blob data is received from userspace and saved in connector state, the same is returned as blob in get property call to userspace. v2: Rebase and modified the metadata structure elements as per Ville's POC changes. v3: No Change v4: Addressed Shashank's review comments v5: Rebase. v6: Addressed Brian Starkey's review comments, defined new structure with header for dynamic metadata scalability. Merge get/set property functions for metadata in this patch. v7: Addressed Jonas Karlman review comments and defined separate structure for infoframe to better align with CTA 861.G spec. Added Shashank's RB. v8: Addressed Ville's review comments. Moved sink metadata structure out of uapi headers as suggested by Jonas Karlman. v9: Rebase and addressed Jonas Karlman review comments. v10: Addressed Ville's review comments, dropped the metdata_changed state variable as its not needed anymore. Signed-off-by: Uma Shankar Reviewed-by: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1558015817-12025-2-git-send-email-uma.shankar@intel.com --- drivers/gpu/drm/drm_atomic_uapi.c | 12 ++++++++++++ drivers/gpu/drm/drm_connector.c | 6 ++++++ include/drm/drm_connector.h | 10 ++++++++++ include/drm/drm_mode_config.h | 7 +++++++ include/linux/hdmi.h | 26 ++++++++++++++++++++++++++ include/uapi/drm/drm_mode.h | 23 +++++++++++++++++++++++ 6 files changed, 84 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 428d82662dc4..125605ff45af 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -676,6 +676,8 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; + bool replaced = false; + int ret; if (property == config->prop_crtc_id) { struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); @@ -726,6 +728,13 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, */ if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; + } else if (property == config->hdr_output_metadata_property) { + ret = drm_atomic_replace_property_blob_from_id(dev, + &state->hdr_output_metadata, + val, + sizeof(struct hdr_output_metadata), -1, + &replaced); + return ret; } else if (property == config->aspect_ratio_property) { state->picture_aspect_ratio = val; } else if (property == config->content_type_property) { @@ -814,6 +823,9 @@ drm_atomic_connector_get_property(struct drm_connector *connector, *val = state->colorspace; } else if (property == connector->scaling_mode_property) { *val = state->scaling_mode; + } else if (property == config->hdr_output_metadata_property) { + *val = state->hdr_output_metadata ? + state->hdr_output_metadata->base.id : 0; } else if (property == connector->content_protection_property) { *val = state->content_protection; } else if (property == config->writeback_fb_id_property) { diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b34c3d38bf15..365ace0c0c9e 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1058,6 +1058,12 @@ int drm_connector_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.non_desktop_property = prop; + prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, + "HDR_OUTPUT_METADATA", 0); + if (!prop) + return -ENOMEM; + dev->mode_config.hdr_output_metadata_property = prop; + return 0; } diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index f43f40d5888a..f0e987df4c1e 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -603,6 +603,12 @@ struct drm_connector_state { * and the connector bpc limitations obtained from edid. */ u8 max_bpc; + + /** + * @hdr_output_metadata: + * DRM blob property for HDR output metadata + */ + struct drm_property_blob *hdr_output_metadata; }; /** @@ -1243,6 +1249,10 @@ struct drm_connector { * &drm_mode_config.connector_free_work. */ struct llist_node free_node; + + /* HDR metdata */ + struct hdr_output_metadata hdr_output_metadata; + struct hdr_sink_metadata hdr_sink_metadata; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 7f60e8eb269a..c031b5a9d8d1 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -836,6 +836,13 @@ struct drm_mode_config { */ struct drm_property *writeback_out_fence_ptr_property; + /** + * hdr_output_metadata_property: Connector property containing hdr + * metatda. This will be provided by userspace compositors based + * on HDR content + */ + struct drm_property *hdr_output_metadata_property; + /* dumb ioctl parameters */ uint32_t preferred_depth, prefer_shadow; diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 927ad6451105..6780476dcbff 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -152,6 +152,16 @@ enum hdmi_content_type { HDMI_CONTENT_TYPE_GAME, }; +enum hdmi_metadata_type { + HDMI_STATIC_METADATA_TYPE1 = 1, +}; + +enum hdmi_eotf { + HDMI_EOTF_TRADITIONAL_GAMMA_SDR, + HDMI_EOTF_TRADITIONAL_GAMMA_HDR, + HDMI_EOTF_SMPTE_ST2084, +}; + struct hdmi_avi_infoframe { enum hdmi_infoframe_type type; unsigned char version; @@ -320,6 +330,22 @@ struct hdmi_vendor_infoframe { unsigned int s3d_ext_data; }; +/* HDR Metadata as per 861.G spec */ +struct hdr_static_metadata { + __u8 eotf; + __u8 metadata_type; + __u16 max_cll; + __u16 max_fall; + __u16 min_cll; +}; + +struct hdr_sink_metadata { + __u32 metadata_type; + union { + struct hdr_static_metadata hdmi_type1; + }; +}; + int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame); ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, void *buffer, size_t size); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 83cd1636b9be..997a7e05c0c6 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -630,6 +630,29 @@ struct drm_color_lut { __u16 reserved; }; +/* HDR Metadata Infoframe as per 861.G spec */ +struct hdr_metadata_infoframe { + __u8 eotf; + __u8 metadata_type; + struct { + __u16 x, y; + } display_primaries[3]; + struct { + __u16 x, y; + } white_point; + __u16 max_display_mastering_luminance; + __u16 min_display_mastering_luminance; + __u16 max_cll; + __u16 max_fall; +}; + +struct hdr_output_metadata { + __u32 metadata_type; + union { + struct hdr_metadata_infoframe hdmi_metadata_type1; + }; +}; + #define DRM_MODE_PAGE_FLIP_EVENT 0x01 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 -- cgit v1.2.3 From 2cdbfd66a82969770ce1a7032fb1e2155a08cee8 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Thu, 16 May 2019 19:40:09 +0530 Subject: drm: Enable HDR infoframe support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable Dynamic Range and Mastering Infoframe for HDR content, which is defined in CEA 861.3 spec. The metadata will be computed based on blending policy in userspace compositors and passed as a connector property blob to driver. The same will be sent as infoframe to panel which support HDR. Added the const version of infoframe for DRM metadata for HDR. v2: Rebase and added Ville's POC changes. v3: No Change v4: Addressed Shashank's review comments and merged the patch making drm infoframe function arguments as constant. v5: Rebase v6: Fixed checkpatch warnings with --strict option. Addressed Shashank's review comments and added his RB. v7: Addressed Brian Starkey's review comments. Merged 2 patches into one. v8: Addressed Jonas Karlman review comments. v9: Addressed Jonas Karlman review comments. v10: Addressed Ville's review comments. v11: Added BUILD_BUG_ON and sizeof instead of magic numbers as per Ville's comments. Signed-off-by: Uma Shankar Reviewed-by: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1558015817-12025-5-git-send-email-uma.shankar@intel.com --- drivers/gpu/drm/drm_edid.c | 72 +++++++++++++++++ drivers/video/hdmi.c | 190 +++++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_edid.h | 5 ++ include/linux/hdmi.h | 28 +++++++ 4 files changed, 295 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index a5ef9f45fee0..73560c9437cd 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4904,6 +4904,78 @@ static bool is_hdmi2_sink(struct drm_connector *connector) connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB420; } +static inline bool is_eotf_supported(u8 output_eotf, u8 sink_eotf) +{ + return sink_eotf & BIT(output_eotf); +} + +/** + * drm_hdmi_infoframe_set_hdr_metadata() - fill an HDMI DRM infoframe with + * HDR metadata from userspace + * @frame: HDMI DRM infoframe + * @hdr_metadata: hdr_source_metadata info from userspace + * + * Return: 0 on success or a negative error code on failure. + */ +int +drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, + const struct drm_connector_state *conn_state) +{ + struct drm_connector *connector; + struct hdr_output_metadata *hdr_metadata; + int err; + + if (!frame || !conn_state) + return -EINVAL; + + connector = conn_state->connector; + + if (!conn_state->hdr_output_metadata) + return -EINVAL; + + hdr_metadata = conn_state->hdr_output_metadata->data; + + if (!hdr_metadata || !connector) + return -EINVAL; + + /* Sink EOTF is Bit map while infoframe is absolute values */ + if (!is_eotf_supported(hdr_metadata->hdmi_metadata_type1.eotf, + connector->hdr_sink_metadata.hdmi_type1.eotf)) { + DRM_DEBUG_KMS("EOTF Not Supported\n"); + return -EINVAL; + } + + err = hdmi_drm_infoframe_init(frame); + if (err < 0) + return err; + + frame->eotf = hdr_metadata->hdmi_metadata_type1.eotf; + frame->metadata_type = hdr_metadata->hdmi_metadata_type1.metadata_type; + + BUILD_BUG_ON(sizeof(frame->display_primaries) != + sizeof(hdr_metadata->hdmi_metadata_type1.display_primaries)); + BUILD_BUG_ON(sizeof(frame->white_point) != + sizeof(hdr_metadata->hdmi_metadata_type1.white_point)); + + memcpy(&frame->display_primaries, + &hdr_metadata->hdmi_metadata_type1.display_primaries, + sizeof(frame->display_primaries)); + + memcpy(&frame->white_point, + &hdr_metadata->hdmi_metadata_type1.white_point, + sizeof(frame->white_point)); + + frame->max_display_mastering_luminance = + hdr_metadata->hdmi_metadata_type1.max_display_mastering_luminance; + frame->min_display_mastering_luminance = + hdr_metadata->hdmi_metadata_type1.min_display_mastering_luminance; + frame->max_fall = hdr_metadata->hdmi_metadata_type1.max_fall; + frame->max_cll = hdr_metadata->hdmi_metadata_type1.max_cll; + + return 0; +} +EXPORT_SYMBOL(drm_hdmi_infoframe_set_hdr_metadata); + /** * drm_hdmi_avi_infoframe_from_display_mode() - fill an HDMI AVI infoframe with * data from a DRM display mode diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 799ae49774f5..481f0367dfd3 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -650,6 +650,150 @@ hdmi_vendor_any_infoframe_check_only(const union hdmi_vendor_any_infoframe *fram return 0; } +/** + * hdmi_drm_infoframe_init() - initialize an HDMI Dynaminc Range and + * mastering infoframe + * @frame: HDMI DRM infoframe + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_drm_infoframe_init(struct hdmi_drm_infoframe *frame) +{ + memset(frame, 0, sizeof(*frame)); + + frame->type = HDMI_INFOFRAME_TYPE_DRM; + frame->version = 1; + frame->length = HDMI_DRM_INFOFRAME_SIZE; + + return 0; +} +EXPORT_SYMBOL(hdmi_drm_infoframe_init); + +static int hdmi_drm_infoframe_check_only(const struct hdmi_drm_infoframe *frame) +{ + if (frame->type != HDMI_INFOFRAME_TYPE_DRM || + frame->version != 1) + return -EINVAL; + + if (frame->length != HDMI_DRM_INFOFRAME_SIZE) + return -EINVAL; + + return 0; +} + +/** + * hdmi_drm_infoframe_check() - check a HDMI DRM infoframe + * @frame: HDMI DRM infoframe + * + * Validates that the infoframe is consistent. + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame) +{ + return hdmi_drm_infoframe_check_only(frame); +} +EXPORT_SYMBOL(hdmi_drm_infoframe_check); + +/** + * hdmi_drm_infoframe_pack_only() - write HDMI DRM infoframe to binary buffer + * @frame: HDMI DRM infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Packs the information contained in the @frame structure into a binary + * representation that can be written into the corresponding controller + * registers. Also computes the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame, + void *buffer, size_t size) +{ + u8 *ptr = buffer; + size_t length; + int i; + + length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; + + if (size < length) + return -ENOSPC; + + memset(buffer, 0, size); + + ptr[0] = frame->type; + ptr[1] = frame->version; + ptr[2] = frame->length; + ptr[3] = 0; /* checksum */ + + /* start infoframe payload */ + ptr += HDMI_INFOFRAME_HEADER_SIZE; + + *ptr++ = frame->eotf; + *ptr++ = frame->metadata_type; + + for (i = 0; i < 3; i++) { + *ptr++ = frame->display_primaries[i].x; + *ptr++ = frame->display_primaries[i].x >> 8; + *ptr++ = frame->display_primaries[i].y; + *ptr++ = frame->display_primaries[i].y >> 8; + } + + *ptr++ = frame->white_point.x; + *ptr++ = frame->white_point.x >> 8; + + *ptr++ = frame->white_point.y; + *ptr++ = frame->white_point.y >> 8; + + *ptr++ = frame->max_display_mastering_luminance; + *ptr++ = frame->max_display_mastering_luminance >> 8; + + *ptr++ = frame->min_display_mastering_luminance; + *ptr++ = frame->min_display_mastering_luminance >> 8; + + *ptr++ = frame->max_cll; + *ptr++ = frame->max_cll >> 8; + + *ptr++ = frame->max_fall; + *ptr++ = frame->max_fall >> 8; + + hdmi_infoframe_set_checksum(buffer, length); + + return length; +} +EXPORT_SYMBOL(hdmi_drm_infoframe_pack_only); + +/** + * hdmi_drm_infoframe_pack() - check a HDMI DRM infoframe, + * and write it to binary buffer + * @frame: HDMI DRM infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_drm_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_drm_infoframe_pack_only(frame, buffer, size); +} +EXPORT_SYMBOL(hdmi_drm_infoframe_pack); + /* * hdmi_vendor_any_infoframe_check() - check a vendor infoframe */ @@ -758,6 +902,10 @@ hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, void *buffer, size_t length = hdmi_avi_infoframe_pack_only(&frame->avi, buffer, size); break; + case HDMI_INFOFRAME_TYPE_DRM: + length = hdmi_drm_infoframe_pack_only(&frame->drm, + buffer, size); + break; case HDMI_INFOFRAME_TYPE_SPD: length = hdmi_spd_infoframe_pack_only(&frame->spd, buffer, size); @@ -806,6 +954,9 @@ hdmi_infoframe_pack(union hdmi_infoframe *frame, case HDMI_INFOFRAME_TYPE_AVI: length = hdmi_avi_infoframe_pack(&frame->avi, buffer, size); break; + case HDMI_INFOFRAME_TYPE_DRM: + length = hdmi_drm_infoframe_pack(&frame->drm, buffer, size); + break; case HDMI_INFOFRAME_TYPE_SPD: length = hdmi_spd_infoframe_pack(&frame->spd, buffer, size); break; @@ -838,6 +989,8 @@ static const char *hdmi_infoframe_type_get_name(enum hdmi_infoframe_type type) return "Source Product Description (SPD)"; case HDMI_INFOFRAME_TYPE_AUDIO: return "Audio"; + case HDMI_INFOFRAME_TYPE_DRM: + return "Dynamic Range and Mastering"; } return "Reserved"; } @@ -1284,6 +1437,40 @@ static void hdmi_audio_infoframe_log(const char *level, frame->downmix_inhibit ? "Yes" : "No"); } +/** + * hdmi_drm_infoframe_log() - log info of HDMI DRM infoframe + * @level: logging level + * @dev: device + * @frame: HDMI DRM infoframe + */ +static void hdmi_drm_infoframe_log(const char *level, + struct device *dev, + const struct hdmi_drm_infoframe *frame) +{ + int i; + + hdmi_infoframe_log_header(level, dev, + (struct hdmi_any_infoframe *)frame); + hdmi_log("length: %d\n", frame->length); + hdmi_log("metadata type: %d\n", frame->metadata_type); + hdmi_log("eotf: %d\n", frame->eotf); + for (i = 0; i < 3; i++) { + hdmi_log("x[%d]: %d\n", i, frame->display_primaries[i].x); + hdmi_log("y[%d]: %d\n", i, frame->display_primaries[i].y); + } + + hdmi_log("white point x: %d\n", frame->white_point.x); + hdmi_log("white point y: %d\n", frame->white_point.y); + + hdmi_log("max_display_mastering_luminance: %d\n", + frame->max_display_mastering_luminance); + hdmi_log("min_display_mastering_luminance: %d\n", + frame->min_display_mastering_luminance); + + hdmi_log("max_cll: %d\n", frame->max_cll); + hdmi_log("max_fall: %d\n", frame->max_fall); +} + static const char * hdmi_3d_structure_get_name(enum hdmi_3d_structure s3d_struct) { @@ -1372,6 +1559,9 @@ void hdmi_infoframe_log(const char *level, case HDMI_INFOFRAME_TYPE_VENDOR: hdmi_vendor_any_infoframe_log(level, dev, &frame->vendor); break; + case HDMI_INFOFRAME_TYPE_DRM: + hdmi_drm_infoframe_log(level, dev, &frame->drm); + break; } } EXPORT_SYMBOL(hdmi_infoframe_log); diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 9d3b5b93102c..0e21e91c4314 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -25,6 +25,7 @@ #include #include +#include struct drm_device; struct i2c_adapter; @@ -370,6 +371,10 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range); +int +drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, + const struct drm_connector_state *conn_state); + /** * drm_eld_mnl - Get ELD monitor name length in bytes. * @eld: pointer to an eld memory structure with mnl set diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 6780476dcbff..bcf3c6c3499e 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -47,6 +47,7 @@ enum hdmi_infoframe_type { HDMI_INFOFRAME_TYPE_AVI = 0x82, HDMI_INFOFRAME_TYPE_SPD = 0x83, HDMI_INFOFRAME_TYPE_AUDIO = 0x84, + HDMI_INFOFRAME_TYPE_DRM = 0x87, }; #define HDMI_IEEE_OUI 0x000c03 @@ -55,6 +56,7 @@ enum hdmi_infoframe_type { #define HDMI_AVI_INFOFRAME_SIZE 13 #define HDMI_SPD_INFOFRAME_SIZE 25 #define HDMI_AUDIO_INFOFRAME_SIZE 10 +#define HDMI_DRM_INFOFRAME_SIZE 26 #define HDMI_INFOFRAME_SIZE(type) \ (HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE) @@ -185,12 +187,37 @@ struct hdmi_avi_infoframe { unsigned short right_bar; }; +/* DRM Infoframe as per CTA 861.G spec */ +struct hdmi_drm_infoframe { + enum hdmi_infoframe_type type; + unsigned char version; + unsigned char length; + enum hdmi_eotf eotf; + enum hdmi_metadata_type metadata_type; + struct { + u16 x, y; + } display_primaries[3]; + struct { + u16 x, y; + } white_point; + u16 max_display_mastering_luminance; + u16 min_display_mastering_luminance; + u16 max_cll; + u16 max_fall; +}; + int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame); ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, size_t size); ssize_t hdmi_avi_infoframe_pack_only(const struct hdmi_avi_infoframe *frame, void *buffer, size_t size); int hdmi_avi_infoframe_check(struct hdmi_avi_infoframe *frame); +int hdmi_drm_infoframe_init(struct hdmi_drm_infoframe *frame); +ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame, void *buffer, + size_t size); +ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame, + void *buffer, size_t size); +int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame); enum hdmi_spd_sdi { HDMI_SPD_SDI_UNKNOWN, @@ -381,6 +408,7 @@ union hdmi_infoframe { struct hdmi_spd_infoframe spd; union hdmi_vendor_any_infoframe vendor; struct hdmi_audio_infoframe audio; + struct hdmi_drm_infoframe drm; }; ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, -- cgit v1.2.3 From b5e3eed1eeb363c148e2935d9d3c12c30a280de6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 16 May 2019 19:40:12 +0530 Subject: drm: Add HLG EOTF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADD HLG EOTF to the list of EOTF transfer functions supported. Hybrid Log-Gamma (HLG) is a high dynamic range (HDR) standard. HLG defines a nonlinear transfer function in which the lower half of the signal values use a gamma curve and the upper half of the signal values use a logarithmic curve. v2: Rebase v3: Fixed a warning message v4: Addressed Shashank's review comments v5: Addressed Jonas Karlman's review comment and dropped the i915 tag from header. Signed-off-by: Ville Syrjälä Signed-off-by: Uma Shankar Reviewed-by: Shashank Sharma Link: https://patchwork.freedesktop.org/patch/msgid/1558015817-12025-8-git-send-email-uma.shankar@intel.com --- drivers/gpu/drm/drm_edid.c | 3 ++- include/linux/hdmi.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 73560c9437cd..262510c2a670 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3854,7 +3854,8 @@ static uint8_t eotf_supported(const u8 *edid_ext) return edid_ext[2] & (BIT(HDMI_EOTF_TRADITIONAL_GAMMA_SDR) | BIT(HDMI_EOTF_TRADITIONAL_GAMMA_HDR) | - BIT(HDMI_EOTF_SMPTE_ST2084)); + BIT(HDMI_EOTF_SMPTE_ST2084) | + BIT(HDMI_EOTF_BT_2100_HLG)); } static uint8_t hdr_metadata_type(const u8 *edid_ext) diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index bcf3c6c3499e..ee55ba589cdc 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -162,6 +162,7 @@ enum hdmi_eotf { HDMI_EOTF_TRADITIONAL_GAMMA_SDR, HDMI_EOTF_TRADITIONAL_GAMMA_HDR, HDMI_EOTF_SMPTE_ST2084, + HDMI_EOTF_BT_2100_HLG, }; struct hdmi_avi_infoframe { -- cgit v1.2.3 From 70f1b0d34bdf03065fe869e93cc17cad1ea20c4a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Feb 2019 19:44:12 -0600 Subject: signal/usb: Replace kill_pid_info_as_cred with kill_pid_usb_asyncio The usb support for asyncio encoded one of it's values in the wrong field. It should have used si_value but instead used si_addr which is not present in the _rt union member of struct siginfo. The practical result of this is that on a 64bit big endian kernel when delivering a signal to a 32bit process the si_addr field is set to NULL, instead of the expected pointer value. This issue can not be fixed in copy_siginfo_to_user32 as the usb usage of the the _sigfault (aka si_addr) member of the siginfo union when SI_ASYNCIO is set is incompatible with the POSIX and glibc usage of the _rt member of the siginfo union. Therefore replace kill_pid_info_as_cred with kill_pid_usb_asyncio a dedicated function for this one specific case. There are no other users of kill_pid_info_as_cred so this specialization should have no impact on the amount of code in the kernel. Have kill_pid_usb_asyncio take instead of a siginfo_t which is difficult and error prone, 3 arguments, a signal number, an errno value, and an address enconded as a sigval_t. The encoding of the address as a sigval_t allows the code that reads the userspace request for a signal to handle this compat issue along with all of the other compat issues. Add BUILD_BUG_ONs in kernel/signal.c to ensure that we can now place the pointer value at the in si_pid (instead of si_addr). That is the code now verifies that si_pid and si_addr always occur at the same location. Further the code veries that for native structures a value placed in si_pid and spilling into si_uid will appear in userspace in si_addr (on a byte by byte copy of siginfo or a field by field copy of siginfo). The code also verifies that for a 64bit kernel and a 32bit userspace the 32bit pointer will fit in si_pid. I have used the usbsig.c program below written by Alan Stern and slightly tweaked by me to run on a big endian machine to verify the issue exists (on sparc64) and to confirm the patch below fixes the issue. /* usbsig.c -- test USB async signal delivery */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include static struct usbdevfs_urb urb; static struct usbdevfs_disconnectsignal ds; static volatile sig_atomic_t done = 0; void urb_handler(int sig, siginfo_t *info , void *ucontext) { printf("Got signal %d, signo %d errno %d code %d addr: %p urb: %p\n", sig, info->si_signo, info->si_errno, info->si_code, info->si_addr, &urb); printf("%s\n", (info->si_addr == &urb) ? "Good" : "Bad"); } void ds_handler(int sig, siginfo_t *info , void *ucontext) { printf("Got signal %d, signo %d errno %d code %d addr: %p ds: %p\n", sig, info->si_signo, info->si_errno, info->si_code, info->si_addr, &ds); printf("%s\n", (info->si_addr == &ds) ? "Good" : "Bad"); done = 1; } int main(int argc, char **argv) { char *devfilename; int fd; int rc; struct sigaction act; struct usb_ctrlrequest *req; void *ptr; char buf[80]; if (argc != 2) { fprintf(stderr, "Usage: usbsig device-file-name\n"); return 1; } devfilename = argv[1]; fd = open(devfilename, O_RDWR); if (fd == -1) { perror("Error opening device file"); return 1; } act.sa_sigaction = urb_handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_SIGINFO; rc = sigaction(SIGUSR1, &act, NULL); if (rc == -1) { perror("Error in sigaction"); return 1; } act.sa_sigaction = ds_handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_SIGINFO; rc = sigaction(SIGUSR2, &act, NULL); if (rc == -1) { perror("Error in sigaction"); return 1; } memset(&urb, 0, sizeof(urb)); urb.type = USBDEVFS_URB_TYPE_CONTROL; urb.endpoint = USB_DIR_IN | 0; urb.buffer = buf; urb.buffer_length = sizeof(buf); urb.signr = SIGUSR1; req = (struct usb_ctrlrequest *) buf; req->bRequestType = USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE; req->bRequest = USB_REQ_GET_DESCRIPTOR; req->wValue = htole16(USB_DT_DEVICE << 8); req->wIndex = htole16(0); req->wLength = htole16(sizeof(buf) - sizeof(*req)); rc = ioctl(fd, USBDEVFS_SUBMITURB, &urb); if (rc == -1) { perror("Error in SUBMITURB ioctl"); return 1; } rc = ioctl(fd, USBDEVFS_REAPURB, &ptr); if (rc == -1) { perror("Error in REAPURB ioctl"); return 1; } memset(&ds, 0, sizeof(ds)); ds.signr = SIGUSR2; ds.context = &ds; rc = ioctl(fd, USBDEVFS_DISCSIGNAL, &ds); if (rc == -1) { perror("Error in DISCSIGNAL ioctl"); return 1; } printf("Waiting for usb disconnect\n"); while (!done) { sleep(1); } close(fd); return 0; } Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: Alan Stern Cc: Oliver Neukum Fixes: v2.3.39 Cc: stable@vger.kernel.org Acked-by: Alan Stern Signed-off-by: "Eric W. Biederman" --- drivers/usb/core/devio.c | 48 +++++++++++++++--------------- include/linux/sched/signal.h | 2 +- kernel/signal.c | 69 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 86 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index fa783531ee88..a02448105527 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -63,7 +63,7 @@ struct usb_dev_state { unsigned int discsignr; struct pid *disc_pid; const struct cred *cred; - void __user *disccontext; + sigval_t disccontext; unsigned long ifclaimed; u32 disabled_bulk_eps; bool privileges_dropped; @@ -90,6 +90,7 @@ struct async { unsigned int ifnum; void __user *userbuffer; void __user *userurb; + sigval_t userurb_sigval; struct urb *urb; struct usb_memory *usbm; unsigned int mem_usage; @@ -582,22 +583,19 @@ static void async_completed(struct urb *urb) { struct async *as = urb->context; struct usb_dev_state *ps = as->ps; - struct kernel_siginfo sinfo; struct pid *pid = NULL; const struct cred *cred = NULL; unsigned long flags; - int signr; + sigval_t addr; + int signr, errno; spin_lock_irqsave(&ps->lock, flags); list_move_tail(&as->asynclist, &ps->async_completed); as->status = urb->status; signr = as->signr; if (signr) { - clear_siginfo(&sinfo); - sinfo.si_signo = as->signr; - sinfo.si_errno = as->status; - sinfo.si_code = SI_ASYNCIO; - sinfo.si_addr = as->userurb; + errno = as->status; + addr = as->userurb_sigval; pid = get_pid(as->pid); cred = get_cred(as->cred); } @@ -615,7 +613,7 @@ static void async_completed(struct urb *urb) spin_unlock_irqrestore(&ps->lock, flags); if (signr) { - kill_pid_info_as_cred(sinfo.si_signo, &sinfo, pid, cred); + kill_pid_usb_asyncio(signr, errno, addr, pid, cred); put_pid(pid); put_cred(cred); } @@ -1427,7 +1425,7 @@ find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb) static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb, struct usbdevfs_iso_packet_desc __user *iso_frame_desc, - void __user *arg) + void __user *arg, sigval_t userurb_sigval) { struct usbdevfs_iso_packet_desc *isopkt = NULL; struct usb_host_endpoint *ep; @@ -1727,6 +1725,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb isopkt = NULL; as->ps = ps; as->userurb = arg; + as->userurb_sigval = userurb_sigval; if (as->usbm) { unsigned long uurb_start = (unsigned long)uurb->buffer; @@ -1801,13 +1800,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb static int proc_submiturb(struct usb_dev_state *ps, void __user *arg) { struct usbdevfs_urb uurb; + sigval_t userurb_sigval; if (copy_from_user(&uurb, arg, sizeof(uurb))) return -EFAULT; + memset(&userurb_sigval, 0, sizeof(userurb_sigval)); + userurb_sigval.sival_ptr = arg; + return proc_do_submiturb(ps, &uurb, (((struct usbdevfs_urb __user *)arg)->iso_frame_desc), - arg); + arg, userurb_sigval); } static int proc_unlinkurb(struct usb_dev_state *ps, void __user *arg) @@ -1977,7 +1980,7 @@ static int proc_disconnectsignal_compat(struct usb_dev_state *ps, void __user *a if (copy_from_user(&ds, arg, sizeof(ds))) return -EFAULT; ps->discsignr = ds.signr; - ps->disccontext = compat_ptr(ds.context); + ps->disccontext.sival_int = ds.context; return 0; } @@ -2005,13 +2008,17 @@ static int get_urb32(struct usbdevfs_urb *kurb, static int proc_submiturb_compat(struct usb_dev_state *ps, void __user *arg) { struct usbdevfs_urb uurb; + sigval_t userurb_sigval; if (get_urb32(&uurb, (struct usbdevfs_urb32 __user *)arg)) return -EFAULT; + memset(&userurb_sigval, 0, sizeof(userurb_sigval)); + userurb_sigval.sival_int = ptr_to_compat(arg); + return proc_do_submiturb(ps, &uurb, ((struct usbdevfs_urb32 __user *)arg)->iso_frame_desc, - arg); + arg, userurb_sigval); } static int processcompl_compat(struct async *as, void __user * __user *arg) @@ -2092,7 +2099,7 @@ static int proc_disconnectsignal(struct usb_dev_state *ps, void __user *arg) if (copy_from_user(&ds, arg, sizeof(ds))) return -EFAULT; ps->discsignr = ds.signr; - ps->disccontext = ds.context; + ps->disccontext.sival_ptr = ds.context; return 0; } @@ -2614,22 +2621,15 @@ const struct file_operations usbdev_file_operations = { static void usbdev_remove(struct usb_device *udev) { struct usb_dev_state *ps; - struct kernel_siginfo sinfo; while (!list_empty(&udev->filelist)) { ps = list_entry(udev->filelist.next, struct usb_dev_state, list); destroy_all_async(ps); wake_up_all(&ps->wait); list_del_init(&ps->list); - if (ps->discsignr) { - clear_siginfo(&sinfo); - sinfo.si_signo = ps->discsignr; - sinfo.si_errno = EPIPE; - sinfo.si_code = SI_ASYNCIO; - sinfo.si_addr = ps->disccontext; - kill_pid_info_as_cred(ps->discsignr, &sinfo, - ps->disc_pid, ps->cred); - } + if (ps->discsignr) + kill_pid_usb_asyncio(ps->discsignr, EPIPE, ps->disccontext, + ps->disc_pid, ps->cred); } } diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 38a0f0785323..c68ca81db0a1 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -329,7 +329,7 @@ extern void force_sigsegv(int sig, struct task_struct *p); extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct kernel_siginfo *, struct pid *, +extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *, const struct cred *); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); diff --git a/kernel/signal.c b/kernel/signal.c index a1eb44dc9ff5..18040d6bd63a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1439,13 +1439,44 @@ static inline bool kill_as_cred_perm(const struct cred *cred, uid_eq(cred->uid, pcred->uid); } -/* like kill_pid_info(), but doesn't use uid/euid of "current" */ -int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid, - const struct cred *cred) +/* + * The usb asyncio usage of siginfo is wrong. The glibc support + * for asyncio which uses SI_ASYNCIO assumes the layout is SIL_RT. + * AKA after the generic fields: + * kernel_pid_t si_pid; + * kernel_uid32_t si_uid; + * sigval_t si_value; + * + * Unfortunately when usb generates SI_ASYNCIO it assumes the layout + * after the generic fields is: + * void __user *si_addr; + * + * This is a practical problem when there is a 64bit big endian kernel + * and a 32bit userspace. As the 32bit address will encoded in the low + * 32bits of the pointer. Those low 32bits will be stored at higher + * address than appear in a 32 bit pointer. So userspace will not + * see the address it was expecting for it's completions. + * + * There is nothing in the encoding that can allow + * copy_siginfo_to_user32 to detect this confusion of formats, so + * handle this by requiring the caller of kill_pid_usb_asyncio to + * notice when this situration takes place and to store the 32bit + * pointer in sival_int, instead of sival_addr of the sigval_t addr + * parameter. + */ +int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, + struct pid *pid, const struct cred *cred) { - int ret = -EINVAL; + struct kernel_siginfo info; struct task_struct *p; unsigned long flags; + int ret = -EINVAL; + + clear_siginfo(&info); + info.si_signo = sig; + info.si_errno = errno; + info.si_code = SI_ASYNCIO; + *((sigval_t *)&info.si_pid) = addr; if (!valid_signal(sig)) return ret; @@ -1456,17 +1487,17 @@ int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid, ret = -ESRCH; goto out_unlock; } - if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) { + if (!kill_as_cred_perm(cred, p)) { ret = -EPERM; goto out_unlock; } - ret = security_task_kill(p, info, sig, cred); + ret = security_task_kill(p, &info, sig, cred); if (ret) goto out_unlock; if (sig) { if (lock_task_sighand(p, &flags)) { - ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0); + ret = __send_signal(sig, &info, p, PIDTYPE_TGID, 0); unlock_task_sighand(p, &flags); } else ret = -ESRCH; @@ -1475,7 +1506,7 @@ out_unlock: rcu_read_unlock(); return ret; } -EXPORT_SYMBOL_GPL(kill_pid_info_as_cred); +EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio); /* * kill_something_info() interprets pid in interesting ways just like kill(2). @@ -4474,6 +4505,28 @@ static inline void siginfo_buildtime_checks(void) CHECK_OFFSET(si_syscall); CHECK_OFFSET(si_arch); #undef CHECK_OFFSET + + /* usb asyncio */ + BUILD_BUG_ON(offsetof(struct siginfo, si_pid) != + offsetof(struct siginfo, si_addr)); + if (sizeof(int) == sizeof(void __user *)) { + BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) != + sizeof(void __user *)); + } else { + BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) + + sizeof_field(struct siginfo, si_uid)) != + sizeof(void __user *)); + BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) != + offsetof(struct siginfo, si_uid)); + } +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) != + offsetof(struct compat_siginfo, si_addr)); + BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != + sizeof(compat_uptr_t)); + BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != + sizeof_field(struct siginfo, si_pid)); +#endif } void __init signals_init(void) -- cgit v1.2.3 From 0db355d499f10a79b6a5161e77c7eba8f062bde4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 May 2019 15:00:25 -0700 Subject: ipv4/igmp: shrink struct ip_sf_list Removing two 4 bytes holes allows to use kmalloc-32 kmem cache instead of kmalloc-64 on 64bit kernels. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 9c94b2ea789c..6649cb78de4a 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -65,8 +65,8 @@ struct ip_mc_socklist { struct ip_sf_list { struct ip_sf_list *sf_next; - __be32 sf_inaddr; unsigned long sf_count[2]; /* include/exclude counts */ + __be32 sf_inaddr; unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ -- cgit v1.2.3 From c08e7e4c8a6f04e01d16117eb4a0077059ec2cd4 Mon Sep 17 00:00:00 2001 From: Guillaume La Roque Date: Tue, 14 May 2019 10:26:48 +0200 Subject: pinctrl: generic: add new 'drive-strength-microamp' property support Add drive-strength-microamp property support to allow drive strength in uA Signed-off-by: Guillaume La Roque Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf-generic.c | 2 ++ include/linux/pinctrl/pinconf-generic.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index b4f7f8a458ea..d0cbdb1ad76a 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -39,6 +39,7 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_STRENGTH, "output drive strength", "mA", true), + PCONFDUMP(PIN_CONFIG_DRIVE_STRENGTH_UA, "output drive strength", "uA", true), PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "usec", true), PCONFDUMP(PIN_CONFIG_INPUT_ENABLE, "input enabled", NULL, false), PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL, false), @@ -167,6 +168,7 @@ static const struct pinconf_generic_params dt_params[] = { { "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 }, { "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 }, { "drive-strength", PIN_CONFIG_DRIVE_STRENGTH, 0 }, + { "drive-strength-microamp", PIN_CONFIG_DRIVE_STRENGTH_UA, 0 }, { "input-debounce", PIN_CONFIG_INPUT_DEBOUNCE, 0 }, { "input-disable", PIN_CONFIG_INPUT_ENABLE, 0 }, { "input-enable", PIN_CONFIG_INPUT_ENABLE, 1 }, diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 6c0680641108..72d06d6a3099 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -55,6 +55,8 @@ * push-pull mode, the argument is ignored. * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current * passed as argument. The argument is in mA. + * @PIN_CONFIG_DRIVE_STRENGTH_UA: the pin will sink or source at most the current + * passed as argument. The argument is in uA. * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode, * which means it will wait for signals to settle when reading inputs. The * argument gives the debounce time in usecs. Setting the @@ -112,6 +114,7 @@ enum pin_config_param { PIN_CONFIG_DRIVE_OPEN_SOURCE, PIN_CONFIG_DRIVE_PUSH_PULL, PIN_CONFIG_DRIVE_STRENGTH, + PIN_CONFIG_DRIVE_STRENGTH_UA, PIN_CONFIG_INPUT_DEBOUNCE, PIN_CONFIG_INPUT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, -- cgit v1.2.3 From 036f394dd77f8117346874151793ec38967d843f Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 22 May 2019 17:29:24 +0200 Subject: pinctrl: Enable device link creation for pin control A pin controller may want to create a link between itself and its clients to be sure of suspend/resume call ordering. Introduce link_consumers field in pinctrl_desc structure to let pinctrl core knows that controller expect to create a link. Signed-off-by: Benjamin Gaignard [Renamed create_link to link_consumers] Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 11 +++++++++++ include/linux/pinctrl/pinctrl.h | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c6ff4d5fa482..d757c51d7114 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1216,6 +1216,15 @@ struct pinctrl_state *pinctrl_lookup_state(struct pinctrl *p, } EXPORT_SYMBOL_GPL(pinctrl_lookup_state); +static void pinctrl_link_add(struct pinctrl_dev *pctldev, + struct device *consumer) +{ + if (pctldev->desc->link_consumers) + device_link_add(consumer, pctldev->dev, + DL_FLAG_PM_RUNTIME | + DL_FLAG_AUTOREMOVE_CONSUMER); +} + /** * pinctrl_commit_state() - select/activate/program a pinctrl state to HW * @p: the pinctrl handle for the device that requests configuration @@ -1261,6 +1270,8 @@ static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state) if (ret < 0) { goto unapply_new_state; } + + pinctrl_link_add(setting->pctldev, p->dev); } p->state = state; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 8f5dbb84547a..2744113f1024 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -125,6 +125,10 @@ struct pinctrl_ops { * the hardware description * @custom_conf_items: Information how to print @params in debugfs, must be * the same size as the @custom_params, i.e. @num_custom_params + * @link_consumers: If true create a device link between pinctrl and its + * consumers (i.e. the devices requesting pin control states). This is + * sometimes necessary to ascertain the right suspend/resume order for + * example. */ struct pinctrl_desc { const char *name; @@ -139,6 +143,7 @@ struct pinctrl_desc { const struct pinconf_generic_params *custom_params; const struct pin_config_item *custom_conf_items; #endif + bool link_consumers; }; /* External interface to pin controller */ -- cgit v1.2.3 From 0e344d8c709fe01d882fc0fb5452bedfe5eba67a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 13 May 2019 13:58:47 -0400 Subject: cpu/topology: Export die_id Export die_id in cpu topology, for the benefit of hardware that has multiple-die/package. Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: linux-doc@vger.kernel.org Link: https://lkml.kernel.org/r/e7d1caaf4fbd24ee40db6d557ab28d7d83298900.1557769318.git.len.brown@intel.com --- Documentation/cputopology.txt | 15 ++++++++++++--- drivers/base/topology.c | 4 ++++ include/linux/topology.h | 3 +++ 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index cb61277e2308..2ff8a1e9a2db 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -12,6 +12,12 @@ physical_package_id: socket number, but the actual value is architecture and platform dependent. +die_id: + + the CPU die ID of cpuX. Typically it is the hardware platform's + identifier (rather than the kernel's). The actual value is + architecture and platform dependent. + core_id: the CPU core ID of cpuX. Typically it is the hardware platform's @@ -81,6 +87,7 @@ For an architecture to support this feature, it must define some of these macros in include/asm-XXX/topology.h:: #define topology_physical_package_id(cpu) + #define topology_die_id(cpu) #define topology_core_id(cpu) #define topology_book_id(cpu) #define topology_drawer_id(cpu) @@ -99,9 +106,11 @@ provides default definitions for any of the above macros that are not defined by include/asm-XXX/topology.h: 1) topology_physical_package_id: -1 -2) topology_core_id: 0 -3) topology_sibling_cpumask: just the given CPU -4) topology_core_cpumask: just the given CPU +2) topology_die_id: -1 +3) topology_core_id: 0 +4) topology_sibling_cpumask: just the given CPU +5) topology_core_cpumask: just the given CPU +6) topology_die_cpumask: just the given CPU For architectures that don't support books (CONFIG_SCHED_BOOK) there are no default definitions for topology_book_id() and topology_book_cpumask(). diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 5fd9f167ecc1..50352cf96f85 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -43,6 +43,9 @@ static ssize_t name##_list_show(struct device *dev, \ define_id_show_func(physical_package_id); static DEVICE_ATTR_RO(physical_package_id); +define_id_show_func(die_id); +static DEVICE_ATTR_RO(die_id); + define_id_show_func(core_id); static DEVICE_ATTR_RO(core_id); @@ -72,6 +75,7 @@ static DEVICE_ATTR_RO(drawer_siblings_list); static struct attribute *default_attrs[] = { &dev_attr_physical_package_id.attr, + &dev_attr_die_id.attr, &dev_attr_core_id.attr, &dev_attr_thread_siblings.attr, &dev_attr_thread_siblings_list.attr, diff --git a/include/linux/topology.h b/include/linux/topology.h index cb0775e1ee4b..5cc8595dd0e4 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -184,6 +184,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_die_id +#define topology_die_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -- cgit v1.2.3 From 2e4c54dac7b360c3820399bdf06cde9134a4495b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 13 May 2019 13:58:56 -0400 Subject: topology: Create core_cpus and die_cpus sysfs attributes Create CPU topology sysfs attributes: "core_cpus" and "core_cpus_list" These attributes represent all of the logical CPUs that share the same core. These attriutes is synonymous with the existing "thread_siblings" and "thread_siblings_list" attribute, which will be deprecated. Create CPU topology sysfs attributes: "die_cpus" and "die_cpus_list". These attributes represent all of the logical CPUs that share the same die. Suggested-by: Brice Goglin Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/071c23a298cd27ede6ed0b6460cae190d193364f.1557769318.git.len.brown@intel.com --- Documentation/cputopology.txt | 21 +++++++++++++++------ arch/x86/include/asm/smp.h | 1 + arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/smpboot.c | 22 ++++++++++++++++++++++ arch/x86/xen/smp_pv.c | 1 + drivers/base/topology.c | 12 ++++++++++++ include/linux/topology.h | 3 +++ 7 files changed, 55 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 48af5c290e20..b90dafcc8237 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -36,15 +36,15 @@ drawer_id: identifier (rather than the kernel's). The actual value is architecture and platform dependent. -thread_siblings: +core_cpus: - internal kernel map of cpuX's hardware threads within the same - core as cpuX. + internal kernel map of CPUs within the same core. + (deprecated name: "thread_siblings") -thread_siblings_list: +core_cpus_list: - human-readable list of cpuX's hardware threads within the same - core as cpuX. + human-readable list of CPUs within the same core. + (deprecated name: "thread_siblings_list"); package_cpus: @@ -56,6 +56,14 @@ package_cpus_list: human-readable list of CPUs sharing the same physical_package_id. (deprecated name: "core_siblings_list") +die_cpus: + + internal kernel map of CPUs within the same die. + +die_cpus_list: + + human-readable list of CPUs within the same die. + book_siblings: internal kernel map of cpuX's hardware threads within the same @@ -93,6 +101,7 @@ these macros in include/asm-XXX/topology.h:: #define topology_drawer_id(cpu) #define topology_sibling_cpumask(cpu) #define topology_core_cpumask(cpu) + #define topology_die_cpumask(cpu) #define topology_book_cpumask(cpu) #define topology_drawer_cpumask(cpu) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index da545df207b2..b673a226ad6c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -23,6 +23,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 9de16b4f6023..4b14d2318251 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -111,6 +111,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef CONFIG_SMP +#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a6e01b6c2709..1a19a5171949 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -91,6 +91,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); +/* representing HT, core, and die siblings of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); +EXPORT_PER_CPU_SYMBOL(cpu_die_map); + DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ @@ -509,6 +513,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + if ((c->phys_proc_id == o->phys_proc_id) && + (c->cpu_die_id == o->cpu_die_id)) + return true; + return false; +} + + #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { @@ -571,6 +584,7 @@ void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); + cpumask_set_cpu(cpu, topology_die_cpumask(cpu)); c->booted_cores = 1; return; } @@ -619,6 +633,9 @@ void set_cpu_sibling_map(int cpu) } if (match_pkg(c, o) && !topology_same_node(c, o)) x86_has_numa_in_package = true; + + if ((i == cpu) || (has_mp && match_die(c, o))) + link_mask(topology_die_cpumask, cpu, i); } threads = cpumask_weight(topology_sibling_cpumask(cpu)); @@ -1223,6 +1240,7 @@ static __init void disable_smp(void) physid_set_mask_of_physid(0, &phys_cpu_present_map); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); + cpumask_set_cpu(0, topology_die_cpumask(0)); } /* @@ -1318,6 +1336,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } @@ -1538,6 +1557,8 @@ static void remove_siblinginfo(int cpu) cpu_data(sibling).booted_cores--; } + for_each_cpu(sibling, topology_die_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); for_each_cpu(sibling, topology_sibling_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) @@ -1545,6 +1566,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); + cpumask_clear(topology_die_cpumask(cpu)); c->cpu_core_id = 0; c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 590fcf863006..77d81c1a63e9 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -251,6 +251,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index dc3c19b482f3..4e033d4cc0dc 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -53,10 +53,18 @@ define_siblings_show_func(thread_siblings, sibling_cpumask); static DEVICE_ATTR_RO(thread_siblings); static DEVICE_ATTR_RO(thread_siblings_list); +define_siblings_show_func(core_cpus, sibling_cpumask); +static DEVICE_ATTR_RO(core_cpus); +static DEVICE_ATTR_RO(core_cpus_list); + define_siblings_show_func(core_siblings, core_cpumask); static DEVICE_ATTR_RO(core_siblings); static DEVICE_ATTR_RO(core_siblings_list); +define_siblings_show_func(die_cpus, die_cpumask); +static DEVICE_ATTR_RO(die_cpus); +static DEVICE_ATTR_RO(die_cpus_list); + define_siblings_show_func(package_cpus, core_cpumask); static DEVICE_ATTR_RO(package_cpus); static DEVICE_ATTR_RO(package_cpus_list); @@ -83,8 +91,12 @@ static struct attribute *default_attrs[] = { &dev_attr_core_id.attr, &dev_attr_thread_siblings.attr, &dev_attr_thread_siblings_list.attr, + &dev_attr_core_cpus.attr, + &dev_attr_core_cpus_list.attr, &dev_attr_core_siblings.attr, &dev_attr_core_siblings_list.attr, + &dev_attr_die_cpus.attr, + &dev_attr_die_cpus_list.attr, &dev_attr_package_cpus.attr, &dev_attr_package_cpus_list.attr, #ifdef CONFIG_SCHED_BOOK diff --git a/include/linux/topology.h b/include/linux/topology.h index 5cc8595dd0e4..47a3e3c08036 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -196,6 +196,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_die_cpumask +#define topology_die_cpumask(cpu) cpumask_of(cpu) +#endif #ifdef CONFIG_SCHED_SMT static inline const struct cpumask *cpu_smt_mask(int cpu) -- cgit v1.2.3 From 924b5867e7bd6a6a98014f0517b747465b108011 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 15 May 2019 09:48:12 -0700 Subject: spi: Allow SPI devices to request the pumping thread be realtime Right now the only way to get the SPI pumping thread bumped up to realtime priority is for the controller to request it. However it may be that the controller works fine with the normal priority but communication to a particular SPI device on the bus needs realtime priority. Let's add a way for devices to request realtime priority when they set themselves up. NOTE: this will just affect the priority of transfers that end up on the SPI core's pumping thread. In many cases transfers happen in the context of the caller so if you need realtime priority for all transfers you should ensure the calling context is also realtime priority. Signed-off-by: Douglas Anderson Reviewed-by: Guenter Roeck Tested-by: Enric Balletbo i Serra Signed-off-by: Mark Brown --- drivers/spi/spi.c | 36 ++++++++++++++++++++++++++++++------ include/linux/spi/spi.h | 2 ++ 2 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 5e75944ad5d1..18f70e4bbb31 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1364,10 +1364,32 @@ static void spi_pump_messages(struct kthread_work *work) __spi_pump_messages(ctlr, true); } -static int spi_init_queue(struct spi_controller *ctlr) +/** + * spi_set_thread_rt - set the controller to pump at realtime priority + * @ctlr: controller to boost priority of + * + * This can be called because the controller requested realtime priority + * (by setting the ->rt value before calling spi_register_controller()) or + * because a device on the bus said that its transfers needed realtime + * priority. + * + * NOTE: at the moment if any device on a bus says it needs realtime then + * the thread will be at realtime priority for all transfers on that + * controller. If this eventually becomes a problem we may see if we can + * find a way to boost the priority only temporarily during relevant + * transfers. + */ +static void spi_set_thread_rt(struct spi_controller *ctlr) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; + dev_info(&ctlr->dev, + "will run message pump with realtime priority\n"); + sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, ¶m); +} + +static int spi_init_queue(struct spi_controller *ctlr) +{ ctlr->running = false; ctlr->busy = false; @@ -1387,11 +1409,8 @@ static int spi_init_queue(struct spi_controller *ctlr) * request and the scheduling of the message pump thread. Without this * setting the message pump thread will remain at default priority. */ - if (ctlr->rt) { - dev_info(&ctlr->dev, - "will run message pump with realtime priority\n"); - sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, ¶m); - } + if (ctlr->rt) + spi_set_thread_rt(ctlr); return 0; } @@ -2982,6 +3001,11 @@ int spi_setup(struct spi_device *spi) spi_set_cs(spi, false); + if (spi->rt && !spi->controller->rt) { + spi->controller->rt = true; + spi_set_thread_rt(spi->controller); + } + dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s%u bits/w, %u Hz max --> %d\n", (int) (spi->mode & (SPI_CPOL | SPI_CPHA)), (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "", diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 053abd22ad31..15505c2485d6 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -109,6 +109,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * This may be changed by the device's driver, or left at the * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. + * @rt: Make the pump thread real time priority. * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state @@ -143,6 +144,7 @@ struct spi_device { u32 max_speed_hz; u8 chip_select; u8 bits_per_word; + bool rt; u32 mode; #define SPI_CPHA 0x01 /* clock phase */ #define SPI_CPOL 0x02 /* clock polarity */ -- cgit v1.2.3 From 1bd33bf0fe6d3012410db0302187199871b510a0 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Thu, 23 May 2019 14:02:20 +0200 Subject: net: ll_temac: Prepare indirect register access for multicast support With .ndo_set_rx_mode/temac_set_multicast_list() being called in atomic context (holding addr_list_lock), and temac_set_multicast_list() needing to access temac indirect registers, the mutex used to synchronize indirect register is a no-no. Replace it with a spinlock, and avoid sleeping in temac_indirect_busywait(). To avoid excessive holding of the lock, which is now a spinlock, the temac_device_reset() function is changed to only hold the lock for short periods. With timeouts, it could be holding the spinlock for more than 2 seconds. Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac.h | 5 +- drivers/net/ethernet/xilinx/ll_temac_main.c | 240 ++++++++++++++++++-------- drivers/net/ethernet/xilinx/ll_temac_mdio.c | 20 +-- include/linux/platform_data/xilinx-ll-temac.h | 3 +- 4 files changed, 179 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 1aeda084b8f1..276292bca334 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -361,7 +361,7 @@ struct temac_local { /* For synchronization of indirect register access. Must be * shared mutex between interfaces in same TEMAC block. */ - struct mutex *indirect_mutex; + spinlock_t *indirect_lock; u32 options; /* Current options word */ int last_link; unsigned int temac_features; @@ -388,8 +388,9 @@ struct temac_local { /* xilinx_temac.c */ int temac_indirect_busywait(struct temac_local *lp); u32 temac_indirect_in32(struct temac_local *lp, int reg); +u32 temac_indirect_in32_locked(struct temac_local *lp, int reg); void temac_indirect_out32(struct temac_local *lp, int reg, u32 value); - +void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value); /* xilinx_temac_mdio.c */ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 65fb549241b2..cc58bd8c12f6 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include "ll_temac.h" @@ -84,51 +85,118 @@ static void _temac_iow_le(struct temac_local *lp, int offset, u32 value) return iowrite32(value, lp->regs + offset); } +static bool hard_acs_rdy(struct temac_local *lp) +{ + return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK; +} + +static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout) +{ + ktime_t cur = ktime_get(); + + return hard_acs_rdy(lp) || ktime_after(cur, timeout); +} + +/* Poll for maximum 20 ms. This is similar to the 2 jiffies @ 100 Hz + * that was used before, and should cover MDIO bus speed down to 3200 + * Hz. + */ +#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC) + +/** + * temac_indirect_busywait - Wait for current indirect register access + * to complete. + */ int temac_indirect_busywait(struct temac_local *lp) { - unsigned long end = jiffies + 2; + ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS); - while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) { - if (time_before_eq(end, jiffies)) { - WARN_ON(1); - return -ETIMEDOUT; - } - usleep_range(500, 1000); - } - return 0; + spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout)); + if (WARN_ON(!hard_acs_rdy(lp))) + return -ETIMEDOUT; + else + return 0; } /** - * temac_indirect_in32 - * - * lp->indirect_mutex must be held when calling this function + * temac_indirect_in32 - Indirect register read access. This function + * must be called without lp->indirect_lock being held. */ u32 temac_indirect_in32(struct temac_local *lp, int reg) { - u32 val; + unsigned long flags; + int val; + + spin_lock_irqsave(lp->indirect_lock, flags); + val = temac_indirect_in32_locked(lp, reg); + spin_unlock_irqrestore(lp->indirect_lock, flags); + return val; +} - if (temac_indirect_busywait(lp)) +/** + * temac_indirect_in32_locked - Indirect register read access. This + * function must be called with lp->indirect_lock being held. Use + * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid + * repeated lock/unlock and to ensure uninterrupted access to indirect + * registers. + */ +u32 temac_indirect_in32_locked(struct temac_local *lp, int reg) +{ + /* This initial wait should normally not spin, as we always + * try to wait for indirect access to complete before + * releasing the indirect_lock. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return -ETIMEDOUT; + /* Initiate read from indirect register */ temac_iow(lp, XTE_CTL0_OFFSET, reg); - if (temac_indirect_busywait(lp)) + /* Wait for indirect register access to complete. We really + * should not see timeouts, and could even end up causing + * problem for following indirect access, so let's make a bit + * of WARN noise. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return -ETIMEDOUT; - val = temac_ior(lp, XTE_LSW0_OFFSET); - - return val; + /* Value is ready now */ + return temac_ior(lp, XTE_LSW0_OFFSET); } /** - * temac_indirect_out32 - * - * lp->indirect_mutex must be held when calling this function + * temac_indirect_out32 - Indirect register write access. This function + * must be called without lp->indirect_lock being held. */ void temac_indirect_out32(struct temac_local *lp, int reg, u32 value) { - if (temac_indirect_busywait(lp)) + unsigned long flags; + + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, reg, value); + spin_unlock_irqrestore(lp->indirect_lock, flags); +} + +/** + * temac_indirect_out32_locked - Indirect register write access. This + * function must be called with lp->indirect_lock being held. Use + * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid + * repeated lock/unlock and to ensure uninterrupted access to indirect + * registers. + */ +void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value) +{ + /* As in temac_indirect_in32_locked(), we should normally not + * spin here. And if it happens, we actually end up silently + * ignoring the write request. Ouch. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return; + /* Initiate write to indirect register */ temac_iow(lp, XTE_LSW0_OFFSET, value); temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg); - temac_indirect_busywait(lp); + /* As in temac_indirect_in32_locked(), we should not see timeouts + * here. And if it happens, we continue before the write has + * completed. Not good. + */ + WARN_ON(temac_indirect_busywait(lp)); } /** @@ -344,20 +412,21 @@ out: static void temac_do_set_mac_address(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); + unsigned long flags; /* set up unicast MAC address filter set its mac address */ - mutex_lock(lp->indirect_mutex); - temac_indirect_out32(lp, XTE_UAW0_OFFSET, - (ndev->dev_addr[0]) | - (ndev->dev_addr[1] << 8) | - (ndev->dev_addr[2] << 16) | - (ndev->dev_addr[3] << 24)); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET, + (ndev->dev_addr[0]) | + (ndev->dev_addr[1] << 8) | + (ndev->dev_addr[2] << 16) | + (ndev->dev_addr[3] << 24)); /* There are reserved bits in EUAW1 * so don't affect them Set MAC bits [47:32] in EUAW1 */ - temac_indirect_out32(lp, XTE_UAW1_OFFSET, - (ndev->dev_addr[4] & 0x000000ff) | - (ndev->dev_addr[5] << 8)); - mutex_unlock(lp->indirect_mutex); + temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET, + (ndev->dev_addr[4] & 0x000000ff) | + (ndev->dev_addr[5] << 8)); + spin_unlock_irqrestore(lp->indirect_lock, flags); } static int temac_init_mac_address(struct net_device *ndev, const void *address) @@ -383,42 +452,56 @@ static int temac_set_mac_address(struct net_device *ndev, void *p) static void temac_set_multicast_list(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); - u32 multi_addr_msw, multi_addr_lsw, val; + u32 multi_addr_msw, multi_addr_lsw; int i; + unsigned long flags; + bool promisc_mode_disabled = false; - mutex_lock(lp->indirect_mutex); - if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) || - netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) { + if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) || + (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) { temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK); dev_info(&ndev->dev, "Promiscuous mode enabled.\n"); - } else if (!netdev_mc_empty(ndev)) { + return; + } + + spin_lock_irqsave(lp->indirect_lock, flags); + + if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; i = 0; netdev_for_each_mc_addr(ha, ndev) { - if (i >= MULTICAST_CAM_TABLE_NUM) + if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM)) break; multi_addr_msw = ((ha->addr[3] << 24) | (ha->addr[2] << 16) | (ha->addr[1] << 8) | (ha->addr[0])); - temac_indirect_out32(lp, XTE_MAW0_OFFSET, - multi_addr_msw); + temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, + multi_addr_msw); multi_addr_lsw = ((ha->addr[5] << 8) | (ha->addr[4]) | (i << 16)); - temac_indirect_out32(lp, XTE_MAW1_OFFSET, - multi_addr_lsw); + temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, + multi_addr_lsw); i++; } } else { - val = temac_indirect_in32(lp, XTE_AFM_OFFSET); - temac_indirect_out32(lp, XTE_AFM_OFFSET, - val & ~XTE_AFM_EPPRM_MASK); - temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0); - temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0); - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); + temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16); + } } - mutex_unlock(lp->indirect_mutex); + + /* Enable address filter block if currently disabled */ + if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET) + & XTE_AFM_EPPRM_MASK) { + temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0); + promisc_mode_disabled = true; + } + + spin_unlock_irqrestore(lp->indirect_lock, flags); + + if (promisc_mode_disabled) + dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } static struct temac_option { @@ -509,17 +592,19 @@ static u32 temac_setoptions(struct net_device *ndev, u32 options) struct temac_local *lp = netdev_priv(ndev); struct temac_option *tp = &temac_options[0]; int reg; + unsigned long flags; - mutex_lock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); while (tp->opt) { - reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or; - if (options & tp->opt) + reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or; + if (options & tp->opt) { reg |= tp->m_or; - temac_indirect_out32(lp, tp->reg, reg); + temac_indirect_out32_locked(lp, tp->reg, reg); + } tp++; } + spin_unlock_irqrestore(lp->indirect_lock, flags); lp->options |= options; - mutex_unlock(lp->indirect_mutex); return 0; } @@ -530,6 +615,7 @@ static void temac_device_reset(struct net_device *ndev) struct temac_local *lp = netdev_priv(ndev); u32 timeout; u32 val; + unsigned long flags; /* Perform a software reset */ @@ -538,7 +624,6 @@ static void temac_device_reset(struct net_device *ndev) dev_dbg(&ndev->dev, "%s()\n", __func__); - mutex_lock(lp->indirect_mutex); /* Reset the receiver and wait for it to finish reset */ temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK); timeout = 1000; @@ -564,8 +649,11 @@ static void temac_device_reset(struct net_device *ndev) } /* Disable the receiver */ - val = temac_indirect_in32(lp, XTE_RXC1_OFFSET); - temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK); + spin_lock_irqsave(lp->indirect_lock, flags); + val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET); + temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, + val & ~XTE_RXC1_RXEN_MASK); + spin_unlock_irqrestore(lp->indirect_lock, flags); /* Reset Local Link (DMA) */ lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST); @@ -585,12 +673,12 @@ static void temac_device_reset(struct net_device *ndev) "temac_device_reset descriptor allocation failed\n"); } - temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0); - temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0); - temac_indirect_out32(lp, XTE_TXC_OFFSET, 0); - temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK); - - mutex_unlock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK); + spin_unlock_irqrestore(lp->indirect_lock, flags); /* Sync default options with HW * but leave receiver and transmitter disabled. */ @@ -614,13 +702,14 @@ static void temac_adjust_link(struct net_device *ndev) struct phy_device *phy = ndev->phydev; u32 mii_speed; int link_state; + unsigned long flags; /* hash together the state values to decide if something has changed */ link_state = phy->speed | (phy->duplex << 1) | phy->link; - mutex_lock(lp->indirect_mutex); if (lp->last_link != link_state) { - mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET); + spin_lock_irqsave(lp->indirect_lock, flags); + mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET); mii_speed &= ~XTE_EMCFG_LINKSPD_MASK; switch (phy->speed) { @@ -630,11 +719,12 @@ static void temac_adjust_link(struct net_device *ndev) } /* Write new speed setting out to TEMAC */ - temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed); + temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed); + spin_unlock_irqrestore(lp->indirect_lock, flags); + lp->last_link = link_state; phy_print_status(phy); } - mutex_unlock(lp->indirect_mutex); } #ifdef CONFIG_64BIT @@ -1096,17 +1186,17 @@ static int temac_probe(struct platform_device *pdev) /* Setup mutex for synchronization of indirect register access */ if (pdata) { - if (!pdata->indirect_mutex) { + if (!pdata->indirect_lock) { dev_err(&pdev->dev, - "indirect_mutex missing in platform_data\n"); + "indirect_lock missing in platform_data\n"); return -EINVAL; } - lp->indirect_mutex = pdata->indirect_mutex; + lp->indirect_lock = pdata->indirect_lock; } else { - lp->indirect_mutex = devm_kmalloc(&pdev->dev, - sizeof(*lp->indirect_mutex), - GFP_KERNEL); - mutex_init(lp->indirect_mutex); + lp->indirect_lock = devm_kmalloc(&pdev->dev, + sizeof(*lp->indirect_lock), + GFP_KERNEL); + spin_lock_init(lp->indirect_lock); } /* map device registers */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c index a4667326f745..6fd2dea4e60f 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c +++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c @@ -25,14 +25,15 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg) { struct temac_local *lp = bus->priv; u32 rc; + unsigned long flags; /* Write the PHY address to the MIIM Access Initiator register. * When the transfer completes, the PHY register value will appear * in the LSW0 register */ - mutex_lock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg); - rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET); - mutex_unlock(lp->indirect_mutex); + rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET); + spin_unlock_irqrestore(lp->indirect_lock, flags); dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n", phy_id, reg, rc); @@ -43,6 +44,7 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg) static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) { struct temac_local *lp = bus->priv; + unsigned long flags; dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n", phy_id, reg, val); @@ -50,10 +52,10 @@ static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) /* First write the desired value into the write data register * and then write the address into the access initiator register */ - mutex_lock(lp->indirect_mutex); - temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val); - temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg); - mutex_unlock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val); + temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg); + spin_unlock_irqrestore(lp->indirect_lock, flags); return 0; } @@ -87,9 +89,7 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev) /* Enable the MDIO bus by asserting the enable bit and writing * in the clock config */ - mutex_lock(lp->indirect_mutex); temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div); - mutex_unlock(lp->indirect_mutex); bus = devm_mdiobus_alloc(&pdev->dev); if (!bus) @@ -116,10 +116,8 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev) if (rc) return rc; - mutex_lock(lp->indirect_mutex); dev_dbg(lp->dev, "MDIO bus registered; MC:%x\n", temac_indirect_in32(lp, XTE_MC_OFFSET)); - mutex_unlock(lp->indirect_mutex); return 0; } diff --git a/include/linux/platform_data/xilinx-ll-temac.h b/include/linux/platform_data/xilinx-ll-temac.h index 368530f98176..f4a68136afa6 100644 --- a/include/linux/platform_data/xilinx-ll-temac.h +++ b/include/linux/platform_data/xilinx-ll-temac.h @@ -4,6 +4,7 @@ #include #include +#include struct ll_temac_platform_data { bool txcsum; /* Enable/disable TX checksum */ @@ -21,7 +22,7 @@ struct ll_temac_platform_data { * TEMAC IP block, the same mutex should be passed here, as * they share the same DCR bus bridge. */ - struct mutex *indirect_mutex; + spinlock_t *indirect_lock; /* DMA channel control setup */ u8 tx_irq_timeout; /* TX Interrupt Delay Time-out */ u8 tx_irq_count; /* TX Interrupt Coalescing Threshold Count */ -- cgit v1.2.3 From 9395da4efbd46661f0049d24d54d1cea63241fc9 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 22 May 2019 14:21:07 -0600 Subject: net: qualcomm: rmnet: Move common struct definitions to include Create if_rmnet.h and move the rmnet MAP packet structs to this common include file. To account for portablity, add little and big endian bitfield definitions similar to the ip & tcp headers. The definitions in the headers can now be re-used by the upcoming ipa driver series as well as qmi_wwan. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h | 25 +---------- include/linux/if_rmnet.h | 55 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 include/linux/if_rmnet.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h index 884f1f52dcc2..991d7e285736 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -12,6 +12,7 @@ #ifndef _RMNET_MAP_H_ #define _RMNET_MAP_H_ +#include struct rmnet_map_control_command { u8 command_name; @@ -39,30 +40,6 @@ enum rmnet_map_commands { RMNET_MAP_COMMAND_ENUM_LENGTH }; -struct rmnet_map_header { - u8 pad_len:6; - u8 reserved_bit:1; - u8 cd_bit:1; - u8 mux_id; - __be16 pkt_len; -} __aligned(1); - -struct rmnet_map_dl_csum_trailer { - u8 reserved1; - u8 valid:1; - u8 reserved2:7; - u16 csum_start_offset; - u16 csum_length; - __be16 csum_value; -} __aligned(1); - -struct rmnet_map_ul_csum_header { - __be16 csum_start_offset; - u16 csum_insert_offset:14; - u16 udp_ip4_ind:1; - u16 csum_enabled:1; -} __aligned(1); - #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \ (Y)->data)->mux_id) #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \ diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h new file mode 100644 index 000000000000..b4f5403383fc --- /dev/null +++ b/include/linux/if_rmnet.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (c) 2013-2019, The Linux Foundation. All rights reserved. + */ + +#ifndef _LINUX_IF_RMNET_H_ +#define _LINUX_IF_RMNET_H_ + +struct rmnet_map_header { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 pad_len:6; + u8 reserved_bit:1; + u8 cd_bit:1; +#elif defined (__BIG_ENDIAN_BITFIELD) + u8 cd_bit:1; + u8 reserved_bit:1; + u8 pad_len:6; +#else +#error "Please fix " +#endif + u8 mux_id; + __be16 pkt_len; +} __aligned(1); + +struct rmnet_map_dl_csum_trailer { + u8 reserved1; +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 valid:1; + u8 reserved2:7; +#elif defined (__BIG_ENDIAN_BITFIELD) + u8 reserved2:7; + u8 valid:1; +#else +#error "Please fix " +#endif + u16 csum_start_offset; + u16 csum_length; + __be16 csum_value; +} __aligned(1); + +struct rmnet_map_ul_csum_header { + __be16 csum_start_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) + u16 csum_insert_offset:14; + u16 udp_ip4_ind:1; + u16 csum_enabled:1; +#elif defined (__BIG_ENDIAN_BITFIELD) + u16 csum_enabled:1; + u16 udp_ip4_ind:1; + u16 csum_insert_offset:14; +#else +#error "Please fix " +#endif +} __aligned(1); + +#endif /* !(_LINUX_IF_RMNET_H_) */ -- cgit v1.2.3 From a8f500af0ccffc3d2aaf9018537981cb173865a1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 May 2019 20:17:06 -0700 Subject: bpf: split explored_states split explored_states into prune_point boolean mark and link list of explored states. This removes STATE_LIST_MARK hack and allows marks to be separate from states. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 31 +++++++++++++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1305ccbd8fe6..02bba09a0ea1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -233,6 +233,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + bool prune_point; unsigned int orig_idx; /* original instruction index */ }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 736b5a0d4848..6a3e69ba891e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5436,7 +5436,6 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static struct bpf_verifier_state_list **explored_state( struct bpf_verifier_env *env, int idx) @@ -5446,7 +5445,7 @@ static struct bpf_verifier_state_list **explored_state( static void init_explored_state(struct bpf_verifier_env *env, int idx) { - env->explored_states[idx] = STATE_LIST_MARK; + env->insn_aux_data[idx].prune_point = true; } /* t, w, e - match pseudo-code above: @@ -6018,10 +6017,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, int i; sl = *explored_state(env, insn); - if (!sl) - return; - - while (sl != STATE_LIST_MARK) { + while (sl) { if (sl->state.curframe != cur->curframe) goto next; for (i = 0; i <= cur->curframe; i++) @@ -6376,18 +6372,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - pprev = explored_state(env, insn_idx); - sl = *pprev; - - if (!sl) + if (!env->insn_aux_data[insn_idx].prune_point) /* this 'insn_idx' instruction wasn't marked, so we will not * be doing state search here */ return 0; + pprev = explored_state(env, insn_idx); + sl = *pprev; + clean_live_states(env, insn_idx, cur); - while (sl != STATE_LIST_MARK) { + while (sl) { if (states_equal(env, &sl->state, cur)) { sl->hit_cnt++; /* reached equivalent register/stack state, @@ -8145,13 +8141,12 @@ static void free_states(struct bpf_verifier_env *env) for (i = 0; i < env->prog->len; i++) { sl = env->explored_states[i]; - if (sl) - while (sl != STATE_LIST_MARK) { - sln = sl->next; - free_verifier_state(&sl->state, false); - kfree(sl); - sl = sln; - } + while (sl) { + sln = sl->next; + free_verifier_state(&sl->state, false); + kfree(sl); + sl = sln; + } } kvfree(env->explored_states); -- cgit v1.2.3 From dc2a4ebc0b44a212fcf72242210e56aa17e7317b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 May 2019 20:17:07 -0700 Subject: bpf: convert explored_states to hash table All prune points inside a callee bpf function most likely will have different callsites. For example, if function foo() is called from two callsites the half of explored states in all prune points in foo() will be useless for subsequent walking of one of those callsites. Fortunately explored_states pruning heuristics keeps the number of states per prune point small, but walking these states is still a waste of cpu time when the callsite of the current state is different from the callsite of the explored state. To improve pruning logic convert explored_states into hash table and use simple insn_idx ^ callsite hash to select hash bucket. This optimization has no effect on programs without bpf2bpf calls and drastically improves programs with calls. In the later case it reduces total memory consumption in 1M scale tests by almost 3 times (peak_states drops from 5752 to 2016). Care should be taken when comparing the states for equivalency. Since the same hash bucket can now contain states with different indices the insn_idx has to be part of verifier_state and compared. Different hash table sizes and different hash functions were explored, but the results were not significantly better vs this patch. They can be improved in the future. Hit/miss heuristic is not counting index miscompare as a miss. Otherwise verifier stats become unstable when experimenting with different hash functions. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 02bba09a0ea1..405b502283c5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; + u32 insn_idx; u32 curframe; u32 active_spin_lock; bool speculative; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6a3e69ba891e..550091c7a46a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5436,11 +5436,19 @@ enum { BRANCH = 2, }; +static u32 state_htab_size(struct bpf_verifier_env *env) +{ + return env->prog->len; +} + static struct bpf_verifier_state_list **explored_state( struct bpf_verifier_env *env, int idx) { - return &env->explored_states[idx]; + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_func_state *state = cur->frame[cur->curframe]; + + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; } static void init_explored_state(struct bpf_verifier_env *env, int idx) @@ -6018,7 +6026,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, sl = *explored_state(env, insn); while (sl) { - if (sl->state.curframe != cur->curframe) + if (sl->state.insn_idx != insn || + sl->state.curframe != cur->curframe) goto next; for (i = 0; i <= cur->curframe; i++) if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) @@ -6384,6 +6393,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) clean_live_states(env, insn_idx, cur); while (sl) { + states_cnt++; + if (sl->state.insn_idx != insn_idx) + goto next; if (states_equal(env, &sl->state, cur)) { sl->hit_cnt++; /* reached equivalent register/stack state, @@ -6401,7 +6413,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return err; return 1; } - states_cnt++; sl->miss_cnt++; /* heuristic to determine whether this state is beneficial * to keep checking from state equivalence point of view. @@ -6428,6 +6439,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) sl = *pprev; continue; } +next: pprev = &sl->next; sl = *pprev; } @@ -6459,6 +6471,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) kfree(new_sl); return err; } + new->insn_idx = insn_idx; new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all @@ -8138,7 +8151,7 @@ static void free_states(struct bpf_verifier_env *env) if (!env->explored_states) return; - for (i = 0; i < env->prog->len; i++) { + for (i = 0; i < state_htab_size(env); i++) { sl = env->explored_states[i]; while (sl) { @@ -8246,7 +8259,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, goto skip_full_check; } - env->explored_states = kvcalloc(env->prog->len, + env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; -- cgit v1.2.3 From 59fcdce425b7c947ccea03a16e393af9bb4d6262 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 May 2019 17:05:59 -0700 Subject: clk: Remove ifdef for COMMON_CLK in clk-provider.h This ifdef has been there since the beginning of this file, but it doesn't really seem to serve any purpose besides obfuscating the struct definitions and #defines here from compilation units that include it. Let's always expose these function prototypes and struct definitions so that code can inspect clk providers without needing to have CONFIG_COMMON_CLK enabled. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bb6118f79784..3bced2ec9f26 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -9,8 +9,6 @@ #include #include -#ifdef CONFIG_COMMON_CLK - /* * flags used across common struct clk. these flags should only affect the * top-level framework. custom flags for dealing with hardware specifics @@ -1019,5 +1017,4 @@ static inline int of_clk_detect_critical(struct device_node *np, int index, void clk_gate_restore_context(struct clk_hw *hw); -#endif /* CONFIG_COMMON_CLK */ #endif /* CLK_PROVIDER_H */ -- cgit v1.2.3 From 30d5a945743cd05ec5c847f2e38c2fbda5e00944 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 May 2019 17:11:57 -0700 Subject: clk: Unexport __clk_of_table This symbol doesn't need to be exported to clk providers anymore. Originally, it was hidden inside clk.c, but then OMAP needed to get access to it in commit 819b4861c18d ("CLK: ti: add init support for clock IP blocks"), but eventually that code also changed in commit c08ee14cc663 ("clk: ti: change clock init to use generic of_clk_init") and we were left with this exported. Move this back into clk.c so that it isn't exposed anymore. Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 1 + include/linux/clk-provider.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index aa51756fd4d6..b34e84bb8167 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4038,6 +4038,7 @@ struct of_clk_provider { void *data; }; +extern struct of_device_id __clk_of_table; static const struct of_device_id __clk_of_table_sentinel __used __section(__clk_of_table_end); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 3bced2ec9f26..9ba000e3a50d 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -865,8 +865,6 @@ static inline long divider_ro_round_rate(struct clk_hw *hw, unsigned long rate, */ unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate); -struct of_device_id; - struct clk_onecell_data { struct clk **clks; unsigned int clk_num; @@ -877,8 +875,6 @@ struct clk_hw_onecell_data { struct clk_hw *hws[]; }; -extern struct of_device_id __clk_of_table; - #define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) /* -- cgit v1.2.3 From 418a3ab1e7785799193c0f8628cd0f01c00a03ae Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Apr 2019 04:54:42 -0700 Subject: mm/balloon_compaction: List interfaces Introduce interfaces for ballooning enqueueing and dequeueing of a list of pages. These interfaces reduce the overhead of storing and restoring IRQs by batching the operations. In addition they do not panic if the list of pages is empty. Cc: Jason Wang Cc: linux-mm@kvack.org Cc: virtualization@lists.linux-foundation.org Acked-by: Michael S. Tsirkin Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- include/linux/balloon_compaction.h | 4 ++ mm/balloon_compaction.c | 144 +++++++++++++++++++++++++++---------- 2 files changed, 110 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index f31521dcb09a..338aa27e4773 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -64,6 +64,10 @@ extern struct page *balloon_page_alloc(void); extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, struct page *page); extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); +extern size_t balloon_page_list_enqueue(struct balloon_dev_info *b_dev_info, + struct list_head *pages); +extern size_t balloon_page_list_dequeue(struct balloon_dev_info *b_dev_info, + struct list_head *pages, size_t n_req_pages); static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) { diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index ef858d547e2d..b7bd72612c5a 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -10,6 +10,105 @@ #include #include +static void balloon_page_enqueue_one(struct balloon_dev_info *b_dev_info, + struct page *page) +{ + /* + * Block others from accessing the 'page' when we get around to + * establishing additional references. We should be the only one + * holding a reference to the 'page' at this point. If we are not, then + * memory corruption is possible and we should stop execution. + */ + BUG_ON(!trylock_page(page)); + list_del(&page->lru); + balloon_page_insert(b_dev_info, page); + unlock_page(page); + __count_vm_event(BALLOON_INFLATE); +} + +/** + * balloon_page_list_enqueue() - inserts a list of pages into the balloon page + * list. + * @b_dev_info: balloon device descriptor where we will insert a new page to + * @pages: pages to enqueue - allocated using balloon_page_alloc. + * + * Driver must call it to properly enqueue a balloon pages before definitively + * removing it from the guest system. + * + * Return: number of pages that were enqueued. + */ +size_t balloon_page_list_enqueue(struct balloon_dev_info *b_dev_info, + struct list_head *pages) +{ + struct page *page, *tmp; + unsigned long flags; + size_t n_pages = 0; + + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + list_for_each_entry_safe(page, tmp, pages, lru) { + balloon_page_enqueue_one(b_dev_info, page); + n_pages++; + } + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + return n_pages; +} +EXPORT_SYMBOL_GPL(balloon_page_list_enqueue); + +/** + * balloon_page_list_dequeue() - removes pages from balloon's page list and + * returns a list of the pages. + * @b_dev_info: balloon device decriptor where we will grab a page from. + * @pages: pointer to the list of pages that would be returned to the caller. + * @n_req_pages: number of requested pages. + * + * Driver must call this function to properly de-allocate a previous enlisted + * balloon pages before definetively releasing it back to the guest system. + * This function tries to remove @n_req_pages from the ballooned pages and + * return them to the caller in the @pages list. + * + * Note that this function may fail to dequeue some pages temporarily empty due + * to compaction isolated pages. + * + * Return: number of pages that were added to the @pages list. + */ +size_t balloon_page_list_dequeue(struct balloon_dev_info *b_dev_info, + struct list_head *pages, size_t n_req_pages) +{ + struct page *page, *tmp; + unsigned long flags; + size_t n_pages = 0; + + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { + if (n_pages == n_req_pages) + break; + + /* + * Block others from accessing the 'page' while we get around to + * establishing additional references and preparing the 'page' + * to be released by the balloon driver. + */ + if (!trylock_page(page)) + continue; + + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION) && + PageIsolated(page)) { + /* raced with isolation */ + unlock_page(page); + continue; + } + balloon_page_delete(page); + __count_vm_event(BALLOON_DEFLATE); + list_add(&page->lru, pages); + unlock_page(page); + n_pages++; + } + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + + return n_pages; +} +EXPORT_SYMBOL_GPL(balloon_page_list_dequeue); + /* * balloon_page_alloc - allocates a new page for insertion into the balloon * page list. @@ -43,17 +142,9 @@ void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, { unsigned long flags; - /* - * Block others from accessing the 'page' when we get around to - * establishing additional references. We should be the only one - * holding a reference to the 'page' at this point. - */ - BUG_ON(!trylock_page(page)); spin_lock_irqsave(&b_dev_info->pages_lock, flags); - balloon_page_insert(b_dev_info, page); - __count_vm_event(BALLOON_INFLATE); + balloon_page_enqueue_one(b_dev_info, page); spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); - unlock_page(page); } EXPORT_SYMBOL_GPL(balloon_page_enqueue); @@ -70,36 +161,13 @@ EXPORT_SYMBOL_GPL(balloon_page_enqueue); */ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) { - struct page *page, *tmp; unsigned long flags; - bool dequeued_page; + LIST_HEAD(pages); + int n_pages; - dequeued_page = false; - spin_lock_irqsave(&b_dev_info->pages_lock, flags); - list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { - /* - * Block others from accessing the 'page' while we get around - * establishing additional references and preparing the 'page' - * to be released by the balloon driver. - */ - if (trylock_page(page)) { -#ifdef CONFIG_BALLOON_COMPACTION - if (PageIsolated(page)) { - /* raced with isolation */ - unlock_page(page); - continue; - } -#endif - balloon_page_delete(page); - __count_vm_event(BALLOON_DEFLATE); - unlock_page(page); - dequeued_page = true; - break; - } - } - spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + n_pages = balloon_page_list_dequeue(b_dev_info, &pages, 1); - if (!dequeued_page) { + if (n_pages != 1) { /* * If we are unable to dequeue a balloon page because the page * list is empty and there is no isolated pages, then something @@ -112,9 +180,9 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) !b_dev_info->isolated_pages)) BUG(); spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); - page = NULL; + return NULL; } - return page; + return list_first_entry(&pages, struct page, lru); } EXPORT_SYMBOL_GPL(balloon_page_dequeue); -- cgit v1.2.3 From 4618d6719743b60f1da4b8112c4518ee46110b94 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 23 May 2019 20:06:49 +0200 Subject: net: phy: add interface mode PHY_INTERFACE_MODE_USXGMII Add support for interface mode PHY_INTERFACE_MODE_USXGMII. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 073fb151b5a9..7180b1d1e5e3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -103,6 +103,7 @@ typedef enum { PHY_INTERFACE_MODE_XAUI, /* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */ PHY_INTERFACE_MODE_10GKR, + PHY_INTERFACE_MODE_USXGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -178,6 +179,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "xaui"; case PHY_INTERFACE_MODE_10GKR: return "10gbase-kr"; + case PHY_INTERFACE_MODE_USXGMII: + return "usxgmii"; default: return "unknown"; } -- cgit v1.2.3 From 3fce8e1eb9945c2771360542b71ff717460ba4d7 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Mon, 6 May 2019 14:16:11 -0500 Subject: leds: TI LMU: Add common code for TI LMU devices Create a TI LMU common framework for TI LMU devices that share common features. Currently the runtime ramp and brightness setting have been identified as common features with common register settings. This work is derived from Milo Kims TI LMU MFD code. Signed-off-by: Dan Murphy Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- drivers/leds/Kconfig | 9 +++ drivers/leds/Makefile | 1 + drivers/leds/leds-ti-lmu-common.c | 156 +++++++++++++++++++++++++++++++++++++ include/linux/leds-ti-lmu-common.h | 47 +++++++++++ 4 files changed, 213 insertions(+) create mode 100644 drivers/leds/leds-ti-lmu-common.c create mode 100644 include/linux/leds-ti-lmu-common.h (limited to 'include/linux') diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 71be87bdb926..4e262696be19 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -783,6 +783,15 @@ config LEDS_NIC78BX To compile this driver as a module, choose M here: the module will be called leds-nic78bx. +config LEDS_TI_LMU_COMMON + tristate "LED driver for TI LMU" + depends on LEDS_CLASS + depends on REGMAP + help + Say Y to enable the LED driver for TI LMU devices. + This supports common features between the TI LM3532, LM3631, LM3632, + LM3633, LM3695 and LM3697. + comment "LED Triggers" source "drivers/leds/trigger/Kconfig" diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 1e9702ebffee..84c49339a8e3 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_LEDS_MT6323) += leds-mt6323.o obj-$(CONFIG_LEDS_LM3692X) += leds-lm3692x.o obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o obj-$(CONFIG_LEDS_LM3601X) += leds-lm3601x.o +obj-$(CONFIG_LEDS_TI_LMU_COMMON) += leds-ti-lmu-common.o # LED SPI Drivers obj-$(CONFIG_LEDS_CR0014114) += leds-cr0014114.o diff --git a/drivers/leds/leds-ti-lmu-common.c b/drivers/leds/leds-ti-lmu-common.c new file mode 100644 index 000000000000..adc7293004f1 --- /dev/null +++ b/drivers/leds/leds-ti-lmu-common.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2015 Texas Instruments +// Copyright 2018 Sebastian Reichel +// Copyright 2018 Pavel Machek +// TI LMU LED common framework, based on previous work from +// Milo Kim + +#include +#include +#include + +#include + +const static int ramp_table[16] = {2048, 262000, 524000, 1049000, 2090000, + 4194000, 8389000, 16780000, 33550000, 41940000, + 50330000, 58720000, 67110000, 83880000, + 100660000, 117440000}; + +static int ti_lmu_common_update_brightness(struct ti_lmu_bank *lmu_bank, + int brightness) +{ + struct regmap *regmap = lmu_bank->regmap; + u8 reg, val; + int ret; + + /* + * Brightness register update + * + * 11 bit dimming: update LSB bits and write MSB byte. + * MSB brightness should be shifted. + * 8 bit dimming: write MSB byte. + */ + if (lmu_bank->max_brightness == MAX_BRIGHTNESS_11BIT) { + reg = lmu_bank->lsb_brightness_reg; + ret = regmap_update_bits(regmap, reg, + LMU_11BIT_LSB_MASK, + brightness); + if (ret) + return ret; + + val = brightness >> LMU_11BIT_MSB_SHIFT; + } else { + val = brightness; + } + + reg = lmu_bank->msb_brightness_reg; + + return regmap_write(regmap, reg, val); +} + +int ti_lmu_common_set_brightness(struct ti_lmu_bank *lmu_bank, int brightness) +{ + return ti_lmu_common_update_brightness(lmu_bank, brightness); +} +EXPORT_SYMBOL(ti_lmu_common_set_brightness); + +static int ti_lmu_common_convert_ramp_to_index(unsigned int usec) +{ + int size = ARRAY_SIZE(ramp_table); + int i; + + if (usec <= ramp_table[0]) + return 0; + + if (usec > ramp_table[size - 1]) + return size - 1; + + for (i = 1; i < size; i++) { + if (usec == ramp_table[i]) + return i; + + /* Find an approximate index by looking up the table */ + if (usec > ramp_table[i - 1] && usec < ramp_table[i]) { + if (usec - ramp_table[i - 1] < ramp_table[i] - usec) + return i - 1; + else + return i; + } + } + + return -EINVAL; +} + +int ti_lmu_common_set_ramp(struct ti_lmu_bank *lmu_bank) +{ + struct regmap *regmap = lmu_bank->regmap; + u8 ramp, ramp_up, ramp_down; + + if (lmu_bank->ramp_up_usec == 0 && lmu_bank->ramp_down_usec == 0) { + ramp_up = 0; + ramp_down = 0; + } else { + ramp_up = ti_lmu_common_convert_ramp_to_index(lmu_bank->ramp_up_usec); + ramp_down = ti_lmu_common_convert_ramp_to_index(lmu_bank->ramp_down_usec); + } + + if (ramp_up < 0 || ramp_down < 0) + return -EINVAL; + + ramp = (ramp_up << 4) | ramp_down; + + return regmap_write(regmap, lmu_bank->runtime_ramp_reg, ramp); + +} +EXPORT_SYMBOL(ti_lmu_common_set_ramp); + +int ti_lmu_common_get_ramp_params(struct device *dev, + struct fwnode_handle *child, + struct ti_lmu_bank *lmu_data) +{ + int ret; + + ret = fwnode_property_read_u32(child, "ramp-up-us", + &lmu_data->ramp_up_usec); + if (ret) + dev_warn(dev, "ramp-up-us property missing\n"); + + + ret = fwnode_property_read_u32(child, "ramp-down-us", + &lmu_data->ramp_down_usec); + if (ret) + dev_warn(dev, "ramp-down-us property missing\n"); + + return 0; +} +EXPORT_SYMBOL(ti_lmu_common_get_ramp_params); + +int ti_lmu_common_get_brt_res(struct device *dev, struct fwnode_handle *child, + struct ti_lmu_bank *lmu_data) +{ + int ret; + + ret = device_property_read_u32(dev, "ti,brightness-resolution", + &lmu_data->max_brightness); + if (ret) + ret = fwnode_property_read_u32(child, + "ti,brightness-resolution", + &lmu_data->max_brightness); + if (lmu_data->max_brightness <= 0) { + lmu_data->max_brightness = MAX_BRIGHTNESS_8BIT; + return ret; + } + + if (lmu_data->max_brightness > MAX_BRIGHTNESS_11BIT) + lmu_data->max_brightness = MAX_BRIGHTNESS_11BIT; + + + return 0; +} +EXPORT_SYMBOL(ti_lmu_common_get_brt_res); + +MODULE_DESCRIPTION("TI LMU common LED framework"); +MODULE_AUTHOR("Sebastian Reichel"); +MODULE_AUTHOR("Dan Murphy "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ti-lmu-led-common"); diff --git a/include/linux/leds-ti-lmu-common.h b/include/linux/leds-ti-lmu-common.h new file mode 100644 index 000000000000..5eb111f38803 --- /dev/null +++ b/include/linux/leds-ti-lmu-common.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// TI LMU Common Core +// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ + +#ifndef _TI_LMU_COMMON_H_ +#define _TI_LMU_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define LMU_11BIT_LSB_MASK (BIT(0) | BIT(1) | BIT(2)) +#define LMU_11BIT_MSB_SHIFT 3 + +#define MAX_BRIGHTNESS_8BIT 255 +#define MAX_BRIGHTNESS_11BIT 2047 + +struct ti_lmu_bank { + struct regmap *regmap; + + int max_brightness; + + u8 lsb_brightness_reg; + u8 msb_brightness_reg; + + u8 runtime_ramp_reg; + u32 ramp_up_usec; + u32 ramp_down_usec; +}; + +int ti_lmu_common_set_brightness(struct ti_lmu_bank *lmu_bank, int brightness); + +int ti_lmu_common_set_ramp(struct ti_lmu_bank *lmu_bank); + +int ti_lmu_common_get_ramp_params(struct device *dev, + struct fwnode_handle *child, + struct ti_lmu_bank *lmu_data); + +int ti_lmu_common_get_brt_res(struct device *dev, struct fwnode_handle *child, + struct ti_lmu_bank *lmu_data); + +#endif /* _TI_LMU_COMMON_H_ */ -- cgit v1.2.3 From b86b9ba55a2e0d1013db26084385d83dd7d0b475 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Mon, 6 May 2019 14:16:13 -0500 Subject: mfd: ti-lmu: Remove support for LM3697 Remove support for the LM3697 from the ti-lmu driver in favor of a dedicated LED driver. Signed-off-by: Dan Murphy Acked-by: Lee Jones Signed-off-by: Jacek Anaszewski --- drivers/mfd/Kconfig | 2 +- drivers/mfd/ti-lmu.c | 17 -------------- include/linux/mfd/ti-lmu-register.h | 44 ------------------------------------- include/linux/mfd/ti-lmu.h | 1 - 4 files changed, 1 insertion(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 294d9567cc71..8933485b28e7 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1336,7 +1336,7 @@ config MFD_TI_LMU help Say yes here to enable support for TI LMU chips. - TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, LM3695 and LM3697. + TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, and LM3695. It consists of backlight, LED and regulator driver. It provides consistent device controls for lighting functions. diff --git a/drivers/mfd/ti-lmu.c b/drivers/mfd/ti-lmu.c index b06cb908d1aa..89b1c5b584af 100644 --- a/drivers/mfd/ti-lmu.c +++ b/drivers/mfd/ti-lmu.c @@ -111,20 +111,6 @@ static const struct mfd_cell lm3695_devices[] = { }, }; -static const struct mfd_cell lm3697_devices[] = { - { - .name = "ti-lmu-backlight", - .id = LM3697, - .of_compatible = "ti,lm3697-backlight", - }, - /* Monitoring driver for open/short circuit detection */ - { - .name = "ti-lmu-fault-monitor", - .id = LM3697, - .of_compatible = "ti,lm3697-fault-monitor", - }, -}; - #define TI_LMU_DATA(chip, max_reg) \ static const struct ti_lmu_data chip##_data = \ { \ @@ -137,7 +123,6 @@ TI_LMU_DATA(lm3631, LM3631_MAX_REG); TI_LMU_DATA(lm3632, LM3632_MAX_REG); TI_LMU_DATA(lm3633, LM3633_MAX_REG); TI_LMU_DATA(lm3695, LM3695_MAX_REG); -TI_LMU_DATA(lm3697, LM3697_MAX_REG); static int ti_lmu_probe(struct i2c_client *cl, const struct i2c_device_id *id) { @@ -206,7 +191,6 @@ static const struct of_device_id ti_lmu_of_match[] = { { .compatible = "ti,lm3632", .data = &lm3632_data }, { .compatible = "ti,lm3633", .data = &lm3633_data }, { .compatible = "ti,lm3695", .data = &lm3695_data }, - { .compatible = "ti,lm3697", .data = &lm3697_data }, { } }; MODULE_DEVICE_TABLE(of, ti_lmu_of_match); @@ -216,7 +200,6 @@ static const struct i2c_device_id ti_lmu_ids[] = { { "lm3632", LM3632 }, { "lm3633", LM3633 }, { "lm3695", LM3695 }, - { "lm3697", LM3697 }, { } }; MODULE_DEVICE_TABLE(i2c, ti_lmu_ids); diff --git a/include/linux/mfd/ti-lmu-register.h b/include/linux/mfd/ti-lmu-register.h index f09510561a55..76998b01764b 100644 --- a/include/linux/mfd/ti-lmu-register.h +++ b/include/linux/mfd/ti-lmu-register.h @@ -189,48 +189,4 @@ #define LM3695_REG_BRT_MSB 0x14 #define LM3695_MAX_REG 0x14 - -/* LM3697 */ -#define LM3697_REG_HVLED_OUTPUT_CFG 0x10 -#define LM3697_HVLED1_CFG_MASK BIT(0) -#define LM3697_HVLED2_CFG_MASK BIT(1) -#define LM3697_HVLED3_CFG_MASK BIT(2) -#define LM3697_HVLED1_CFG_SHIFT 0 -#define LM3697_HVLED2_CFG_SHIFT 1 -#define LM3697_HVLED3_CFG_SHIFT 2 - -#define LM3697_REG_BL0_RAMP 0x11 -#define LM3697_REG_BL1_RAMP 0x12 -#define LM3697_RAMPUP_MASK 0xF0 -#define LM3697_RAMPUP_SHIFT 4 -#define LM3697_RAMPDN_MASK 0x0F -#define LM3697_RAMPDN_SHIFT 0 - -#define LM3697_REG_RAMP_CONF 0x14 -#define LM3697_RAMP_MASK 0x0F -#define LM3697_RAMP_EACH 0x05 - -#define LM3697_REG_PWM_CFG 0x1C -#define LM3697_PWM_A_MASK BIT(0) -#define LM3697_PWM_B_MASK BIT(1) - -#define LM3697_REG_IMAX_A 0x17 -#define LM3697_REG_IMAX_B 0x18 - -#define LM3697_REG_FEEDBACK_ENABLE 0x19 - -#define LM3697_REG_BRT_A_LSB 0x20 -#define LM3697_REG_BRT_A_MSB 0x21 -#define LM3697_REG_BRT_B_LSB 0x22 -#define LM3697_REG_BRT_B_MSB 0x23 - -#define LM3697_REG_ENABLE 0x24 - -#define LM3697_REG_OPEN_FAULT_STATUS 0xB0 - -#define LM3697_REG_SHORT_FAULT_STATUS 0xB2 - -#define LM3697_REG_MONITOR_ENABLE 0xB4 - -#define LM3697_MAX_REG 0xB4 #endif diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h index 7762c1bce55d..54e9d272e81c 100644 --- a/include/linux/mfd/ti-lmu.h +++ b/include/linux/mfd/ti-lmu.h @@ -26,7 +26,6 @@ enum ti_lmu_id { LM3632, LM3633, LM3695, - LM3697, LMU_MAX_ID, }; -- cgit v1.2.3 From 9b4d2b635bd0cf8dfc45223f66fd85792fd2dc7b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 14 May 2019 13:40:52 -0700 Subject: of/fdt: Remove dead code and mark functions with __init Some functions in here are never called, and others are only called during __init. Remove the dead code and some dead exports for functions that don't exist (I'm looking at you of_fdt_get_string!). Mark some functions with __init so we can throw them away after we boot up and poke at the FDT blob too. Cc: Hsin-Yi Wang Signed-off-by: Stephen Boyd Signed-off-by: Rob Herring --- drivers/of/fdt.c | 37 +++++-------------------------------- include/linux/of_fdt.h | 11 ----------- 2 files changed, 5 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index de893c9616a1..918098c9f72a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -38,7 +38,7 @@ * memory entries in the /memory node. This function may be called * any time after initial_boot_param is set. */ -void of_fdt_limit_memory(int limit) +void __init of_fdt_limit_memory(int limit) { int memory; int len; @@ -110,25 +110,6 @@ static int of_fdt_is_compatible(const void *blob, return 0; } -/** - * of_fdt_is_big_endian - Return true if given node needs BE MMIO accesses - * @blob: A device tree blob - * @node: node to test - * - * Returns true if the node has a "big-endian" property, or if the kernel - * was compiled for BE *and* the node has a "native-endian" property. - * Returns false otherwise. - */ -bool of_fdt_is_big_endian(const void *blob, unsigned long node) -{ - if (fdt_getprop(blob, node, "big-endian", NULL)) - return true; - if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && - fdt_getprop(blob, node, "native-endian", NULL)) - return true; - return false; -} - static bool of_fdt_device_is_available(const void *blob, unsigned long node) { const char *status = fdt_getprop(blob, node, "status", NULL); @@ -145,8 +126,8 @@ static bool of_fdt_device_is_available(const void *blob, unsigned long node) /** * of_fdt_match - Return true if node matches a list of compatible values */ -int of_fdt_match(const void *blob, unsigned long node, - const char *const *compat) +static int __init of_fdt_match(const void *blob, unsigned long node, + const char *const *compat) { unsigned int tmp, score = 0; @@ -758,7 +739,7 @@ int __init of_scan_flat_dt_subnodes(unsigned long parent, * @return offset of the subnode, or -FDT_ERR_NOTFOUND if there is none */ -int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname) +int __init of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname) { return fdt_subnode_offset(initial_boot_params, node, uname); } @@ -771,14 +752,6 @@ unsigned long __init of_get_flat_dt_root(void) return 0; } -/** - * of_get_flat_dt_size - Return the total size of the FDT - */ -int __init of_get_flat_dt_size(void) -{ - return fdt_totalsize(initial_boot_params); -} - /** * of_get_flat_dt_prop - Given a node in the flat blob, return the property ptr * @@ -804,7 +777,7 @@ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) /** * of_flat_dt_match - Return true if node matches a list of compatible values */ -int __init of_flat_dt_match(unsigned long node, const char *const *compat) +static int __init of_flat_dt_match(unsigned long node, const char *const *compat) { return of_fdt_match(initial_boot_params, node, compat); } diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index a713e5d156d8..acf820e88952 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -23,15 +23,6 @@ struct device_node; /* For scanning an arbitrary device-tree at any time */ -extern char *of_fdt_get_string(const void *blob, u32 offset); -extern void *of_fdt_get_property(const void *blob, - unsigned long node, - const char *name, - int *size); -extern bool of_fdt_is_big_endian(const void *blob, - unsigned long node); -extern int of_fdt_match(const void *blob, unsigned long node, - const char *const *compat); extern void *of_fdt_unflatten_tree(const unsigned long *blob, struct device_node *dad, struct device_node **mynodes); @@ -64,9 +55,7 @@ extern int of_get_flat_dt_subnode_by_name(unsigned long node, extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, int *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); -extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); -extern int of_get_flat_dt_size(void); extern uint32_t of_get_flat_dt_phandle(unsigned long node); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, -- cgit v1.2.3 From 5327ed3d44b754f5cc51d5b3f18e442eaebacff5 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 24 May 2019 23:25:12 +0100 Subject: bpf: verifier: mark verified-insn with sub-register zext flag eBPF ISA specification requires high 32-bit cleared when low 32-bit sub-register is written. This applies to destination register of ALU32 etc. JIT back-ends must guarantee this semantic when doing code-gen. x86_64 and AArch64 ISA has the same semantics, so the corresponding JIT back-end doesn't need to do extra work. However, 32-bit arches (arm, x86, nfp etc.) and some other 64-bit arches (PowerPC, SPARC etc) need to do explicit zero extension to meet this requirement, otherwise code like the following will fail. u64_value = (u64) u32_value ... other uses of u64_value This is because compiler could exploit the semantic described above and save those zero extensions for extending u32_value to u64_value, these JIT back-ends are expected to guarantee this through inserting extra zero extensions which however could be a significant increase on the code size. Some benchmarks show there could be ~40% sub-register writes out of total insns, meaning at least ~40% extra code-gen. One observation is these extra zero extensions are not always necessary. Take above code snippet for example, it is possible u32_value will never be casted into a u64, the value of high 32-bit of u32_value then could be ignored and extra zero extension could be eliminated. This patch implements this idea, insns defining sub-registers will be marked when the high 32-bit of the defined sub-register matters. For those unmarked insns, it is safe to eliminate high 32-bit clearnace for them. Algo: - Split read flags into READ32 and READ64. - Record index of insn that does sub-register write. Keep the index inside reg state and update it during verifier insn walking. - A full register read on a sub-register marks its definition insn as needing zero extension on dst register. A new sub-register write overrides the old one. - When propagating read64 during path pruning, also mark any insn defining a sub-register that is read in the pruned path as full-register. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 14 +++- kernel/bpf/verifier.c | 173 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 171 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 405b502283c5..704ed7971472 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -36,9 +36,11 @@ */ enum bpf_reg_liveness { REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ - REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ - REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ - REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */ + REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */ + REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */ + REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64, + REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */ + REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ }; struct bpf_reg_state { @@ -131,6 +133,11 @@ struct bpf_reg_state { * pointing to bpf_func_state. */ u32 frameno; + /* Tracks subreg definition. The stored value is the insn_idx of the + * writing insn. This is safe because subreg_def is used before any insn + * patching which only happens after main verification finished. + */ + s32 subreg_def; enum bpf_reg_liveness live; }; @@ -233,6 +240,7 @@ struct bpf_insn_aux_data { int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ + bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ bool prune_point; unsigned int orig_idx; /* original instruction index */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 550091c7a46a..f6b4c7148c3e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -982,6 +982,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, __mark_reg_not_init(regs + regno); } +#define DEF_NOT_SUBREG (0) static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state) { @@ -992,6 +993,7 @@ static void init_reg_state(struct bpf_verifier_env *env, mark_reg_not_init(env, regs, i); regs[i].live = REG_LIVE_NONE; regs[i].parent = NULL; + regs[i].subreg_def = DEF_NOT_SUBREG; } /* frame pointer */ @@ -1137,7 +1139,7 @@ next: */ static int mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, - struct bpf_reg_state *parent) + struct bpf_reg_state *parent, u8 flag) { bool writes = parent == state->parent; /* Observe write marks */ int cnt = 0; @@ -1152,17 +1154,26 @@ static int mark_reg_read(struct bpf_verifier_env *env, parent->var_off.value, parent->off); return -EFAULT; } - if (parent->live & REG_LIVE_READ) + /* The first condition is more likely to be true than the + * second, checked it first. + */ + if ((parent->live & REG_LIVE_READ) == flag || + parent->live & REG_LIVE_READ64) /* The parentage chain never changes and * this parent was already marked as LIVE_READ. * There is no need to keep walking the chain again and * keep re-marking all parents as LIVE_READ. * This case happens when the same register is read * multiple times without writes into it in-between. + * Also, if parent has the stronger REG_LIVE_READ64 set, + * then no need to set the weak REG_LIVE_READ32. */ break; /* ... then we depend on parent's value */ - parent->live |= REG_LIVE_READ; + parent->live |= flag; + /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ + if (flag == REG_LIVE_READ64) + parent->live &= ~REG_LIVE_READ32; state = parent; parent = state->parent; writes = true; @@ -1174,12 +1185,111 @@ static int mark_reg_read(struct bpf_verifier_env *env, return 0; } +/* This function is supposed to be used by the following 32-bit optimization + * code only. It returns TRUE if the source or destination register operates + * on 64-bit, otherwise return FALSE. + */ +static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, + u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) +{ + u8 code, class, op; + + code = insn->code; + class = BPF_CLASS(code); + op = BPF_OP(code); + if (class == BPF_JMP) { + /* BPF_EXIT for "main" will reach here. Return TRUE + * conservatively. + */ + if (op == BPF_EXIT) + return true; + if (op == BPF_CALL) { + /* BPF to BPF call will reach here because of marking + * caller saved clobber with DST_OP_NO_MARK for which we + * don't care the register def because they are anyway + * marked as NOT_INIT already. + */ + if (insn->src_reg == BPF_PSEUDO_CALL) + return false; + /* Helper call will reach here because of arg type + * check, conservatively return TRUE. + */ + if (t == SRC_OP) + return true; + + return false; + } + } + + if (class == BPF_ALU64 || class == BPF_JMP || + /* BPF_END always use BPF_ALU class. */ + (class == BPF_ALU && op == BPF_END && insn->imm == 64)) + return true; + + if (class == BPF_ALU || class == BPF_JMP32) + return false; + + if (class == BPF_LDX) { + if (t != SRC_OP) + return BPF_SIZE(code) == BPF_DW; + /* LDX source must be ptr. */ + return true; + } + + if (class == BPF_STX) { + if (reg->type != SCALAR_VALUE) + return true; + return BPF_SIZE(code) == BPF_DW; + } + + if (class == BPF_LD) { + u8 mode = BPF_MODE(code); + + /* LD_IMM64 */ + if (mode == BPF_IMM) + return true; + + /* Both LD_IND and LD_ABS return 32-bit data. */ + if (t != SRC_OP) + return false; + + /* Implicit ctx ptr. */ + if (regno == BPF_REG_6) + return true; + + /* Explicit source could be any width. */ + return true; + } + + if (class == BPF_ST) + /* The only source register for BPF_ST is a ptr. */ + return true; + + /* Conservatively return true at default. */ + return true; +} + +static void mark_insn_zext(struct bpf_verifier_env *env, + struct bpf_reg_state *reg) +{ + s32 def_idx = reg->subreg_def; + + if (def_idx == DEF_NOT_SUBREG) + return; + + env->insn_aux_data[def_idx - 1].zext_dst = true; + /* The dst will be zero extended, so won't be sub-register anymore. */ + reg->subreg_def = DEF_NOT_SUBREG; +} + static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; struct bpf_reg_state *reg, *regs = state->regs; + bool rw64; if (regno >= MAX_BPF_REG) { verbose(env, "R%d is invalid\n", regno); @@ -1187,6 +1297,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, } reg = ®s[regno]; + rw64 = is_reg64(env, insn, regno, reg, t); if (t == SRC_OP) { /* check whether register used as source operand can be read */ if (reg->type == NOT_INIT) { @@ -1197,7 +1308,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, if (regno == BPF_REG_FP) return 0; - return mark_reg_read(env, reg, reg->parent); + if (rw64) + mark_insn_zext(env, reg); + + return mark_reg_read(env, reg, reg->parent, + rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { @@ -1205,6 +1320,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return -EACCES; } reg->live |= REG_LIVE_WRITTEN; + reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; if (t == DST_OP) mark_reg_unknown(env, regs, regno); } @@ -1384,7 +1500,8 @@ static int check_stack_read(struct bpf_verifier_env *env, state->regs[value_regno].live |= REG_LIVE_WRITTEN; } mark_reg_read(env, ®_state->stack[spi].spilled_ptr, - reg_state->stack[spi].spilled_ptr.parent); + reg_state->stack[spi].spilled_ptr.parent, + REG_LIVE_READ64); return 0; } else { int zeros = 0; @@ -1401,7 +1518,8 @@ static int check_stack_read(struct bpf_verifier_env *env, return -EACCES; } mark_reg_read(env, ®_state->stack[spi].spilled_ptr, - reg_state->stack[spi].spilled_ptr.parent); + reg_state->stack[spi].spilled_ptr.parent, + REG_LIVE_READ64); if (value_regno >= 0) { if (zeros == size) { /* any size read into register is zero extended, @@ -2110,6 +2228,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn value_regno); if (reg_type_may_be_null(reg_type)) regs[value_regno].id = ++env->id_gen; + /* A load of ctx field could have different + * actual load size with the one encoded in the + * insn. When the dst is PTR, it is for sure not + * a sub-register. + */ + regs[value_regno].subreg_def = DEF_NOT_SUBREG; } regs[value_regno].type = reg_type; } @@ -2369,7 +2493,8 @@ mark: * the whole slot to be marked as 'read' */ mark_reg_read(env, &state->stack[spi].spilled_ptr, - state->stack[spi].spilled_ptr.parent); + state->stack[spi].spilled_ptr.parent, + REG_LIVE_READ64); } return update_stack_depth(env, state, min_off); } @@ -3333,6 +3458,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); } + /* helper call returns 64-bit value. */ + regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* update return register (already marked as written above) */ if (fn->ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ @@ -4264,6 +4392,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ *dst_reg = *src_reg; dst_reg->live |= REG_LIVE_WRITTEN; + dst_reg->subreg_def = DEF_NOT_SUBREG; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { @@ -4274,6 +4403,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else if (src_reg->type == SCALAR_VALUE) { *dst_reg = *src_reg; dst_reg->live |= REG_LIVE_WRITTEN; + dst_reg->subreg_def = env->insn_idx + 1; } else { mark_reg_unknown(env, regs, insn->dst_reg); @@ -5353,6 +5483,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) * Already marked as written above. */ mark_reg_unknown(env, regs, BPF_REG_0); + /* ld_abs load up to 32-bit skb data. */ + regs[BPF_REG_0].subreg_def = env->insn_idx + 1; return 0; } @@ -6309,20 +6441,33 @@ static bool states_equal(struct bpf_verifier_env *env, return true; } +/* Return 0 if no propagation happened. Return negative error code if error + * happened. Otherwise, return the propagated bit. + */ static int propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, struct bpf_reg_state *parent_reg) { + u8 parent_flag = parent_reg->live & REG_LIVE_READ; + u8 flag = reg->live & REG_LIVE_READ; int err; - if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ)) + /* When comes here, read flags of PARENT_REG or REG could be any of + * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need + * of propagation if PARENT_REG has strongest REG_LIVE_READ64. + */ + if (parent_flag == REG_LIVE_READ64 || + /* Or if there is no read flag from REG. */ + !flag || + /* Or if the read flag from REG is the same as PARENT_REG. */ + parent_flag == flag) return 0; - err = mark_reg_read(env, reg, parent_reg); + err = mark_reg_read(env, reg, parent_reg, flag); if (err) return err; - return 0; + return flag; } /* A write screens off any subsequent reads; but write marks come from the @@ -6356,8 +6501,10 @@ static int propagate_liveness(struct bpf_verifier_env *env, for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { err = propagate_liveness_reg(env, &state_reg[i], &parent_reg[i]); - if (err) + if (err < 0) return err; + if (err == REG_LIVE_READ64) + mark_insn_zext(env, &parent_reg[i]); } /* Propagate stack slots. */ @@ -6367,11 +6514,11 @@ static int propagate_liveness(struct bpf_verifier_env *env, state_reg = &state->stack[i].spilled_ptr; err = propagate_liveness_reg(env, state_reg, parent_reg); - if (err) + if (err < 0) return err; } } - return err; + return 0; } static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) -- cgit v1.2.3 From 7d134041a89610ae552501fc88652805addcdee4 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 24 May 2019 23:25:14 +0100 Subject: bpf: introduce new mov32 variant for doing explicit zero extension The encoding for this new variant is based on BPF_X format. "imm" field was 0 only, now it could be 1 which means doing zero extension unconditionally .code = BPF_ALU | BPF_MOV | BPF_X .dst_reg = DST .src_reg = SRC .imm = 1 We use this new form for doing zero extension for which verifier will guarantee SRC == DST. Implications on JIT back-ends when doing code-gen for BPF_ALU | BPF_MOV | BPF_X: 1. No change if hardware already does zero extension unconditionally for sub-register write. 2. Otherwise, when seeing imm == 1, just generate insns to clear high 32-bit. No need to generate insns for the move because when imm == 1, dst_reg is the same as src_reg at the moment. Interpreter doesn't need change as well. It is doing unconditionally zero extension for mov32 already. One helper macro BPF_ZEXT_REG is added to help creating zero extension insn using this new mov32 variant. One helper function insn_is_zext is added for checking one insn is an zero extension on dst. This will be widely used by a few JIT back-ends in later patches in this set. Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7148bab96943..bb10ffb88452 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -160,6 +160,20 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Special form of mov32, used for doing explicit zero extension on dst. */ +#define BPF_ZEXT_REG(DST) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = DST, \ + .off = 0, \ + .imm = 1 }) + +static inline bool insn_is_zext(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; +} + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) -- cgit v1.2.3 From a4b1d3c1ddf6cb441187b6c130a473c16a05a356 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 24 May 2019 23:25:15 +0100 Subject: bpf: verifier: insert zero extension according to analysis result After previous patches, verifier will mark a insn if it really needs zero extension on dst_reg. It is then for back-ends to decide how to use such information to eliminate unnecessary zero extension code-gen during JIT compilation. One approach is verifier insert explicit zero extension for those insns that need zero extension in a generic way, JIT back-ends then do not generate zero extension for sub-register write at default. However, only those back-ends which do not have hardware zero extension want this optimization. Back-ends like x86_64 and AArch64 have hardware zero extension support that the insertion should be disabled. This patch introduces new target hook "bpf_jit_needs_zext" which returns false at default, meaning verifier zero extension insertion is disabled at default. A back-end could override this hook to return true if it doesn't have hardware support and want verifier insert zero extension explicitly. Offload targets do not use this native target hook, instead, they could get the optimization results using bpf_prog_offload_ops.finalize. NOTE: arches could have diversified features, it is possible for one arch to have hardware zero extension support for some sub-register write insns but not for all. For example, PowerPC, SPARC have zero extended loads, but not for alu32. So when verifier zero extension insertion enabled, these JIT back-ends need to peephole insns to remove those zero extension inserted for insn that actually has hardware zero extension support. The peephole could be as simple as looking the next insn, if it is a special zero extension insn then it is safe to eliminate it if the current insn has hardware zero extension support. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/filter.h | 1 + kernel/bpf/core.c | 9 +++++++++ kernel/bpf/verifier.c | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fb3aa2dc975..d98141edb74b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -370,6 +370,7 @@ struct bpf_prog_aux { u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ + bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ diff --git a/include/linux/filter.h b/include/linux/filter.h index bb10ffb88452..ba8b65270e0d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -825,6 +825,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); +bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(void) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 242a643af82f..3675b19ecb90 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2090,6 +2090,15 @@ bool __weak bpf_helper_changes_pkt_data(void *func) return false; } +/* Return TRUE if the JIT backend wants verifier to enable sub-register usage + * analysis code and wants explicit zero extension inserted by verifier. + * Otherwise, return FALSE. + */ +bool __weak bpf_jit_needs_zext(void) +{ + return false; +} + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a6af3166acae..d4394a84b9eb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7640,6 +7640,38 @@ static int opt_remove_nops(struct bpf_verifier_env *env) return 0; } +static int opt_subreg_zext_lo32(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux = env->insn_aux_data; + struct bpf_insn *insns = env->prog->insnsi; + int i, delta = 0, len = env->prog->len; + struct bpf_insn zext_patch[2]; + struct bpf_prog *new_prog; + + zext_patch[1] = BPF_ZEXT_REG(0); + for (i = 0; i < len; i++) { + int adj_idx = i + delta; + struct bpf_insn insn; + + if (!aux[adj_idx].zext_dst) + continue; + + insn = insns[adj_idx]; + zext_patch[0] = insn; + zext_patch[1].dst_reg = insn.dst_reg; + zext_patch[1].src_reg = insn.dst_reg; + new_prog = bpf_patch_insn_data(env, adj_idx, zext_patch, 2); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + insns = new_prog->insnsi; + aux = env->insn_aux_data; + delta += 2; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -8490,6 +8522,15 @@ skip_full_check: if (ret == 0) ret = fixup_bpf_calls(env); + /* do 32-bit optimization after insn patching has done so those patched + * insns could be handled correctly. + */ + if (ret == 0 && bpf_jit_needs_zext() && + !bpf_prog_is_dev_bound(env->prog->aux)) { + ret = opt_subreg_zext_lo32(env); + env->prog->aux->verifier_zext = !ret; + } + if (ret == 0) ret = fixup_call_args(env); -- cgit v1.2.3 From 23634ebc1d946f19eb112d4455c1d84948875e31 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Mar 2019 15:25:51 -0700 Subject: rcu: Check for wakeup-safe conditions in rcu_read_unlock_special() When RCU core processing is offloaded from RCU_SOFTIRQ to the rcuc kthreads, a full and unconditional wakeup is required to initiate RCU core processing. In contrast, when RCU core processing is carried out by RCU_SOFTIRQ, a raise_softirq() suffices. Of course, there are situations where raise_softirq() does a full wakeup, but these do not occur with normal usage of rcu_read_unlock(). The reason that full wakeups can be problematic is that the scheduler sometimes invokes rcu_read_unlock() with its pi or rq locks held, which can of course result in deadlock in CONFIG_PREEMPT=y kernels when rcu_read_unlock() invokes the scheduler. Scheduler invocations can happen in the following situations: (1) The just-ended reader has been subjected to RCU priority boosting, in which case rcu_read_unlock() must deboost, (2) Interrupts were disabled across the call to rcu_read_unlock(), so the quiescent state must be deferred, requiring a wakeup of the rcuc kthread corresponding to the current CPU. Now, the scheduler may hold one of its locks across rcu_read_unlock() only if preemption has been disabled across the entire RCU read-side critical section, which in the days prior to RCU flavor consolidation meant that rcu_read_unlock() never needed to do wakeups. However, this is no longer the case for any but the first rcu_read_unlock() following a condition (e.g., preempted RCU reader) requiring special rcu_read_unlock() attention. For example, an RCU read-side critical section might be preempted, but preemption might be disabled across the rcu_read_unlock(). The rcu_read_unlock() must defer the quiescent state, and therefore leaves the task queued on its leaf rcu_node structure. If a scheduler interrupt occurs, the scheduler might well invoke rcu_read_unlock() with one of its locks held. However, the preempted task is still queued, so rcu_read_unlock() will attempt to defer the quiescent state once more. When RCU core processing is carried out by RCU_SOFTIRQ, this works just fine: The raise_softirq() function simply sets a bit in a per-CPU mask and the RCU core processing will be undertaken upon return from interrupt. Not so when RCU core processing is carried out by the rcuc kthread: In this case, the required wakeup can result in deadlock. The initial solution to this problem was to use set_tsk_need_resched() and set_preempt_need_resched() to force a future context switch, which allows rcu_preempt_note_context_switch() to report the deferred quiescent state to RCU's core processing. Unfortunately for expedited grace periods, there can be a significant delay between the call for a context switch and the actual context switch. This commit therefore introduces a ->deferred_qs flag to the task_struct structure's rcu_special structure. This flag is initially false, and is set to true by the first call to rcu_read_unlock() requiring special attention, then finally reset back to false when the quiescent state is finally reported. Then rcu_read_unlock() attempts full wakeups only when ->deferred_qs is false, that is, on the first rcu_read_unlock() requiring special attention. Note that a chain of RCU readers linked by some other sort of reader may find that a later rcu_read_unlock() is once again able to do a full wakeup, courtesy of an intervening preemption: rcu_read_lock(); /* preempted */ local_irq_disable(); rcu_read_unlock(); /* Can do full wakeup, sets ->deferred_qs. */ rcu_read_lock(); local_irq_enable(); preempt_disable() rcu_read_unlock(); /* Cannot do full wakeup, ->deferred_qs set. */ rcu_read_lock(); preempt_enable(); /* preempted, >deferred_qs reset. */ local_irq_disable(); rcu_read_unlock(); /* Can again do full wakeup, sets ->deferred_qs. */ Such linked RCU readers do not yet seem to appear in the Linux kernel, and it is probably best if they don't. However, RCU needs to handle them, and some variations on this theme could make even raise_softirq() unsafe due to the possibility of its doing a full wakeup. This commit therefore also avoids invoking raise_softirq() when the ->deferred_qs set flag is set. Signed-off-by: Paul E. McKenney Cc: Sebastian Andrzej Siewior --- include/linux/sched.h | 2 +- kernel/rcu/tree_plugin.h | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..942a44c1b8eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -565,7 +565,7 @@ union rcu_special { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ - u8 pad; /* No garbage from compiler! */ + u8 deferred_qs; } b; /* Bits. */ u32 s; /* Set of bits. */ }; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 21611862e083..75110ea75d01 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -455,6 +455,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) local_irq_restore(flags); return; } + t->rcu_read_unlock_special.b.deferred_qs = false; if (special.b.need_qs) { rcu_qs(); t->rcu_read_unlock_special.b.need_qs = false; @@ -605,16 +606,24 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); if (preempt_bh_were_disabled || irqs_were_disabled) { - WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); - /* Need to defer quiescent state until everything is enabled. */ - if (irqs_were_disabled && use_softirq) { - /* Enabling irqs does not reschedule, so... */ + t->rcu_read_unlock_special.b.exp_hint = false; + // Need to defer quiescent state until everything is enabled. + if (irqs_were_disabled && use_softirq && + (in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) { + // Using softirq, safe to awaken, and we get + // no help from enabling irqs, unlike bh/preempt. raise_softirq_irqoff(RCU_SOFTIRQ); + } else if (irqs_were_disabled && !use_softirq && + !t->rcu_read_unlock_special.b.deferred_qs) { + // Safe to awaken and we get no help from enabling + // irqs, unlike bh/preempt. + invoke_rcu_core(); } else { - /* Enabling BH or preempt does reschedule, so... */ + // Enabling BH or preempt does reschedule, so... set_tsk_need_resched(current); set_preempt_need_resched(); } + t->rcu_read_unlock_special.b.deferred_qs = true; local_irq_restore(flags); return; } -- cgit v1.2.3 From 71d8d1531e0904e08adf1540e191bd707dfd73da Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 26 Mar 2019 15:24:08 -0400 Subject: lockdep: Add assertion to check if in an interrupt In rcu_rrupt_from_idle, we want to check if it is called from within an interrupt, but want to do such checking only for debug builds. lockdep already tracks when we enter an interrupt. Let us expose it as an assertion macro so it can be used to assert this. Suggested-by: Steven Rostedt Cc: kernel-team@android.com Cc: rcu@vger.kernel.org Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6e2377e6c1d6..e8eef38b2213 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -632,11 +632,18 @@ do { \ "IRQs not disabled as expected\n"); \ } while (0) +#define lockdep_assert_in_irq() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + !current->hardirq_context, \ + "Not in hardirq as expected\n"); \ + } while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) +# define lockdep_assert_in_irq() do { } while (0) #endif #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 1f58bb18f6f28d1df0b7144d90bc90ee5672416d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 May 2019 13:44:57 +0100 Subject: mount_pseudo(): drop 'name' argument, switch to d_make_root() Once upon a time we used to set ->d_name of e.g. pipefs root so that d_path() on pipes would work. These days it's completely pointless - dentries of pipes are not even connected to pipefs root. However, mount_pseudo() had set the root dentry name (passed as the second argument) and callers kept inventing names to pass to it. Including those that didn't *have* any non-root dentries to start with... All of that had been pointless for about 8 years now; it's time to get rid of that cargo-culting... Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 2 +- drivers/dax/super.c | 2 +- drivers/gpu/drm/drm_drv.c | 6 +----- drivers/misc/cxl/api.c | 3 +-- drivers/scsi/cxlflash/ocxl_hw.c | 3 +-- drivers/virtio/virtio_balloon.c | 3 +-- fs/aio.c | 3 +-- fs/anon_inodes.c | 4 ++-- fs/block_dev.c | 2 +- fs/btrfs/tests/btrfs-tests.c | 2 +- fs/libfs.c | 12 +++--------- fs/nsfs.c | 2 +- fs/pipe.c | 2 +- include/linux/fs.h | 6 +++--- mm/z3fold.c | 2 +- mm/zsmalloc.c | 2 +- net/socket.c | 2 +- 17 files changed, 22 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 7a969f4c3534..a30da6f2c28e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -602,7 +602,7 @@ static const struct dentry_operations pfmfs_dentry_operations; static struct dentry * pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, + return mount_pseudo(fs_type, NULL, &pfmfs_dentry_operations, PFMFS_MAGIC); } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 35f051efaf35..f83814eea5ad 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -440,7 +440,7 @@ static const struct super_operations dax_sops = { static struct dentry *dax_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); + return mount_pseudo(fs_type, &dax_sops, NULL, DAXFS_MAGIC); } static struct file_system_type dax_fs_type = { diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8b44ac9a92ae..48365c62a190 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -535,11 +535,7 @@ static struct vfsmount *drm_fs_mnt; static struct dentry *drm_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, - "drm:", - NULL, - NULL, - 0x010203ff); + return mount_pseudo(fs_type, NULL, NULL, 0x010203ff); } static struct file_system_type drm_fs_type = { diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index a59c7af79873..1f2b0535a8cf 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -40,8 +40,7 @@ static struct vfsmount *cxl_vfs_mount; static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "cxl:", NULL, NULL, - CXL_PSEUDO_FS_MAGIC); + return mount_pseudo(fs_type, NULL, NULL, CXL_PSEUDO_FS_MAGIC); } static struct file_system_type cxl_fs_type = { diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 31cfdf2c8c30..38e1fbd2b406 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -48,8 +48,7 @@ static struct dentry *ocxlflash_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "ocxlflash:", NULL, NULL, - OCXLFLASH_FS_MAGIC); + return mount_pseudo(fs_type, NULL, NULL, OCXLFLASH_FS_MAGIC); } static struct file_system_type ocxlflash_fs_type = { diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 554d1a98d193..62bafc4f2662 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -761,8 +761,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, static struct dentry *balloon_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "balloon-kvm:", NULL, NULL, - BALLOON_KVM_MAGIC); + return mount_pseudo(fs_type, NULL, NULL, BALLOON_KVM_MAGIC); } static struct file_system_type balloon_fs = { diff --git a/fs/aio.c b/fs/aio.c index 3490d1fa0e16..09bc35fa6810 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -252,8 +252,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) static struct dentry *aio_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, - AIO_RING_MAGIC); + struct dentry *root = mount_pseudo(fs_type, NULL, NULL, AIO_RING_MAGIC); if (!IS_ERR(root)) root->d_sb->s_iflags |= SB_I_NOEXEC; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 91262c34b797..644d0837aafe 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -41,8 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "anon_inode:", NULL, - &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); + return mount_pseudo(fs_type, NULL, &anon_inodefs_dentry_operations, + ANON_INODE_FS_MAGIC); } static struct file_system_type anon_inode_fs_type = { diff --git a/fs/block_dev.c b/fs/block_dev.c index 0f7552a87d54..3143da7b0998 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -837,7 +837,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { struct dentry *dent; - dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); + dent = mount_pseudo(fs_type, &bdev_sops, NULL, BDEVFS_MAGIC); if (!IS_ERR(dent)) dent->d_sb->s_iflags |= SB_I_CGROUPWB; return dent; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 9238fd4f1734..6da54323eaf8 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -36,7 +36,7 @@ static struct dentry *btrfs_test_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops, + return mount_pseudo(fs_type, &btrfs_test_super_ops, NULL, BTRFS_TEST_MAGIC); } diff --git a/fs/libfs.c b/fs/libfs.c index 4b59b1816efb..030e545f586e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -239,14 +239,12 @@ static const struct super_operations simple_super_operations = { * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ -struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, +struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, const struct super_operations *ops, const struct xattr_handler **xattr, const struct dentry_operations *dops, unsigned long magic) { struct super_block *s; - struct dentry *dentry; struct inode *root; - struct qstr d_name = QSTR_INIT(name, strlen(name)); s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER, &init_user_ns, NULL); @@ -271,13 +269,9 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_atime = root->i_mtime = root->i_ctime = current_time(root); - dentry = __d_alloc(s, &d_name); - if (!dentry) { - iput(root); + s->s_root = d_make_root(root); + if (!s->s_root) goto Enomem; - } - d_instantiate(dentry, root); - s->s_root = dentry; s->s_d_op = dops; s->s_flags |= SB_ACTIVE; return dget(s->s_root); diff --git a/fs/nsfs.c b/fs/nsfs.c index e3bf08c5af41..b3c49ddc0f85 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -261,7 +261,7 @@ static const struct super_operations nsfs_ops = { static struct dentry *nsfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, + return mount_pseudo(fs_type, &nsfs_ops, &ns_dentry_operations, NSFS_MAGIC); } static struct file_system_type nsfs = { diff --git a/fs/pipe.c b/fs/pipe.c index 41065901106b..99a023730e6f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1185,7 +1185,7 @@ static const struct super_operations pipefs_ops = { static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", &pipefs_ops, + return mount_pseudo(fs_type, &pipefs_ops, &pipefs_dentry_operations, PIPEFS_MAGIC); } diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..b06251dd429f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2257,18 +2257,18 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); -extern struct dentry *mount_pseudo_xattr(struct file_system_type *, char *, +extern struct dentry *mount_pseudo_xattr(struct file_system_type *, const struct super_operations *ops, const struct xattr_handler **xattr, const struct dentry_operations *dops, unsigned long); static inline struct dentry * -mount_pseudo(struct file_system_type *fs_type, char *name, +mount_pseudo(struct file_system_type *fs_type, const struct super_operations *ops, const struct dentry_operations *dops, unsigned long magic) { - return mount_pseudo_xattr(fs_type, name, ops, NULL, dops, magic); + return mount_pseudo_xattr(fs_type, ops, NULL, dops, magic); } /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ diff --git a/mm/z3fold.c b/mm/z3fold.c index 0b14daf930a8..abeb5bcbea57 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -242,7 +242,7 @@ static inline void free_handle(unsigned long handle) static struct dentry *z3fold_do_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "z3fold:", NULL, NULL, 0x33); + return mount_pseudo(fs_type, NULL, NULL, 0x33); } static struct file_system_type z3fold_fs = { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index d9f831f63625..ef230be8c03e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1817,7 +1817,7 @@ static void lock_zspage(struct zspage *zspage) static struct dentry *zs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "zsmalloc:", NULL, NULL, ZSMALLOC_MAGIC); + return mount_pseudo(fs_type, NULL, NULL, ZSMALLOC_MAGIC); } static struct file_system_type zsmalloc_fs = { diff --git a/net/socket.c b/net/socket.c index 472fbefa5d9b..c86679584eed 100644 --- a/net/socket.c +++ b/net/socket.c @@ -362,7 +362,7 @@ static const struct xattr_handler *sockfs_xattr_handlers[] = { static struct dentry *sockfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops, + return mount_pseudo_xattr(fs_type, &sockfs_ops, sockfs_xattr_handlers, &sockfs_dentry_operations, SOCKFS_MAGIC); } -- cgit v1.2.3 From bb7b6b2bbdb827e68cd506c8f5e3ba13215cccb2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:28 +0000 Subject: vfs: Kill mount_ns() Kill mount_ns() as it has been replaced by vfs_get_super() in the new mount API. Signed-off-by: David Howells cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/super.c | 38 -------------------------------------- include/linux/fs.h | 3 --- 2 files changed, 41 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 3ba91d70c2a8..6919f5c728f0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1143,44 +1143,6 @@ void kill_litter_super(struct super_block *sb) } EXPORT_SYMBOL(kill_litter_super); -static int ns_test_super(struct super_block *sb, void *data) -{ - return sb->s_fs_info == data; -} - -static int ns_set_super(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); -} - -struct dentry *mount_ns(struct file_system_type *fs_type, - int flags, void *data, void *ns, struct user_namespace *user_ns, - int (*fill_super)(struct super_block *, void *, int)) -{ - struct super_block *sb; - - sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags, - user_ns, ns); - if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (!sb->s_root) { - int err; - err = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); - if (err) { - deactivate_locked_super(sb); - return ERR_PTR(err); - } - - sb->s_flags |= SB_ACTIVE; - } - - return dget(sb->s_root); -} - -EXPORT_SYMBOL(mount_ns); - int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); diff --git a/include/linux/fs.h b/include/linux/fs.h index b06251dd429f..790342cf4df9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2206,9 +2206,6 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -extern struct dentry *mount_ns(struct file_system_type *fs_type, - int flags, void *data, void *ns, struct user_namespace *user_ns, - int (*fill_super)(struct super_block *, void *, int)); #ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, -- cgit v1.2.3 From c80fa7c8301c10ad10d997b9e86b4aeac5923b3e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:23 +0000 Subject: vfs: Provide sb->s_iflags settings in fs_context struct Provide a field in the fs_context struct through which bits in the sb->s_iflags superblock field can be set. Signed-off-by: David Howells cc: linux-fsdevel@vger.kernel.org --- fs/super.c | 1 + include/linux/fs_context.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 72b4a5afcfd6..f836b67abffe 100644 --- a/fs/super.c +++ b/fs/super.c @@ -540,6 +540,7 @@ retry: } fc->s_fs_info = NULL; s->s_type = fc->fs_type; + s->s_iflags |= fc->s_iflags; strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &s->s_type->fs_supers); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 1f966670c8dc..c995b852ba40 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -103,6 +103,7 @@ struct fs_context { void *s_fs_info; /* Proposed s_fs_info */ unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ unsigned int sb_flags_mask; /* Superblock flags that were changed */ + unsigned int s_iflags; /* OR'd with sb->s_iflags */ unsigned int lsm_flags; /* Information flags from the fs to the LSM */ enum fs_context_purpose purpose:8; enum fs_context_phase phase:8; /* The phase the context is in */ -- cgit v1.2.3 From 31d6d5ce53400d6dc58e29ddd8dc184b3ba89d66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:23 +0000 Subject: vfs: Provide a mount_pseudo-replacement for the new mount API Provide a function, init_pseudo(), that provides a common infrastructure for converting pseudo-filesystems that can never be mountable. [AV: once all users of mount_pseudo_xattr() get converted, it will be folded into pseudo_fs_get_tree()] Signed-off-by: David Howells cc: linux-fsdevel@vger.kernel.org --- fs/libfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pseudo_fs.h | 16 ++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 include/linux/pseudo_fs.h (limited to 'include/linux') diff --git a/fs/libfs.c b/fs/libfs.c index 030e545f586e..edef70d35438 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,8 @@ #include #include #include /* sync_mapping_buffers */ +#include +#include #include @@ -235,6 +237,50 @@ static const struct super_operations simple_super_operations = { .statfs = simple_statfs, }; +static int pseudo_fs_get_tree(struct fs_context *fc) +{ + struct pseudo_fs_context *ctx = fc->fs_private; + struct dentry *root; + + root = mount_pseudo_xattr(fc->fs_type, + ctx->ops, ctx->xattr, + ctx->dops, ctx->magic); + if (IS_ERR(root)) + return PTR_ERR(root); + + fc->root = root; + return 0; +} + +static void pseudo_fs_free(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations pseudo_fs_context_ops = { + .free = pseudo_fs_free, + .get_tree = pseudo_fs_get_tree, +}; + +/* + * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that + * will never be mountable) + */ +struct pseudo_fs_context *init_pseudo(struct fs_context *fc, + unsigned long magic) +{ + struct pseudo_fs_context *ctx; + + ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL); + if (likely(ctx)) { + ctx->magic = magic; + fc->fs_private = ctx; + fc->ops = &pseudo_fs_context_ops; + } + return ctx; +} +EXPORT_SYMBOL(init_pseudo); + /* * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h new file mode 100644 index 000000000000..eceda1d1407a --- /dev/null +++ b/include/linux/pseudo_fs.h @@ -0,0 +1,16 @@ +#ifndef __LINUX_PSEUDO_FS__ +#define __LINUX_PSEUDO_FS__ + +#include + +struct pseudo_fs_context { + const struct super_operations *ops; + const struct xattr_handler **xattr; + const struct dentry_operations *dops; + unsigned long magic; +}; + +struct pseudo_fs_context *init_pseudo(struct fs_context *fc, + unsigned long magic); + +#endif -- cgit v1.2.3 From 8d9e46d80777b484f8f0945c317ad618224d7811 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 May 2019 11:43:59 -0400 Subject: fold mount_pseudo_xattr() into pseudo_fs_get_tree() ... now that all other callers are gone Signed-off-by: Al Viro --- fs/libfs.c | 88 +++++++++++++++++++++--------------------------------- include/linux/fs.h | 13 -------- 2 files changed, 34 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/fs/libfs.c b/fs/libfs.c index edef70d35438..7df3c9a85f6b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -240,16 +240,43 @@ static const struct super_operations simple_super_operations = { static int pseudo_fs_get_tree(struct fs_context *fc) { struct pseudo_fs_context *ctx = fc->fs_private; - struct dentry *root; + struct super_block *s; + struct inode *root; - root = mount_pseudo_xattr(fc->fs_type, - ctx->ops, ctx->xattr, - ctx->dops, ctx->magic); - if (IS_ERR(root)) - return PTR_ERR(root); + s = sget_userns(fc->fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER, + &init_user_ns, NULL); + if (IS_ERR(s)) + return PTR_ERR(s); - fc->root = root; + s->s_maxbytes = MAX_LFS_FILESIZE; + s->s_blocksize = PAGE_SIZE; + s->s_blocksize_bits = PAGE_SHIFT; + s->s_magic = ctx->magic; + s->s_op = ctx->ops ?: &simple_super_operations; + s->s_xattr = ctx->xattr; + s->s_time_gran = 1; + root = new_inode(s); + if (!root) + goto Enomem; + /* + * since this is the first inode, make it number 1. New inodes created + * after this must take care not to collide with it (by passing + * max_reserved of 1 to iunique). + */ + root->i_ino = 1; + root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; + root->i_atime = root->i_mtime = root->i_ctime = current_time(root); + s->s_root = d_make_root(root); + if (!s->s_root) + goto Enomem; + s->s_d_op = ctx->dops; + s->s_flags |= SB_ACTIVE; + fc->root = dget(s->s_root); return 0; + +Enomem: + deactivate_locked_super(s); + return -ENOMEM; } static void pseudo_fs_free(struct fs_context *fc) @@ -281,53 +308,6 @@ struct pseudo_fs_context *init_pseudo(struct fs_context *fc, } EXPORT_SYMBOL(init_pseudo); -/* - * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that - * will never be mountable) - */ -struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, - const struct super_operations *ops, const struct xattr_handler **xattr, - const struct dentry_operations *dops, unsigned long magic) -{ - struct super_block *s; - struct inode *root; - - s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER, - &init_user_ns, NULL); - if (IS_ERR(s)) - return ERR_CAST(s); - - s->s_maxbytes = MAX_LFS_FILESIZE; - s->s_blocksize = PAGE_SIZE; - s->s_blocksize_bits = PAGE_SHIFT; - s->s_magic = magic; - s->s_op = ops ? ops : &simple_super_operations; - s->s_xattr = xattr; - s->s_time_gran = 1; - root = new_inode(s); - if (!root) - goto Enomem; - /* - * since this is the first inode, make it number 1. New inodes created - * after this must take care not to collide with it (by passing - * max_reserved of 1 to iunique). - */ - root->i_ino = 1; - root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; - root->i_atime = root->i_mtime = root->i_ctime = current_time(root); - s->s_root = d_make_root(root); - if (!s->s_root) - goto Enomem; - s->s_d_op = dops; - s->s_flags |= SB_ACTIVE; - return dget(s->s_root); - -Enomem: - deactivate_locked_super(s); - return ERR_PTR(-ENOMEM); -} -EXPORT_SYMBOL(mount_pseudo_xattr); - int simple_open(struct inode *inode, struct file *file) { if (inode->i_private) diff --git a/include/linux/fs.h b/include/linux/fs.h index 790342cf4df9..d625acabbfcf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2254,19 +2254,6 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); -extern struct dentry *mount_pseudo_xattr(struct file_system_type *, - const struct super_operations *ops, - const struct xattr_handler **xattr, - const struct dentry_operations *dops, - unsigned long); - -static inline struct dentry * -mount_pseudo(struct file_system_type *fs_type, - const struct super_operations *ops, - const struct dentry_operations *dops, unsigned long magic) -{ - return mount_pseudo_xattr(fs_type, ops, NULL, dops, magic); -} /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ -- cgit v1.2.3 From 023d066a0d0a87696c04b0de2ceae53063d0b655 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:28 +0000 Subject: vfs: Kill sget_userns() Kill sget_userns(), folding it into sget() as that's the only remaining user. Signed-off-by: David Howells cc: linux-fsdevel@vger.kernel.org --- fs/super.c | 54 ++++++++++++++++-------------------------------------- include/linux/fs.h | 5 ----- 2 files changed, 16 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index f836b67abffe..ca2302501d32 100644 --- a/fs/super.c +++ b/fs/super.c @@ -563,24 +563,31 @@ share_extant_sb: EXPORT_SYMBOL(sget_fc); /** - * sget_userns - find or create a superblock - * @type: filesystem type superblock should belong to - * @test: comparison callback - * @set: setup callback - * @flags: mount flags - * @user_ns: User namespace for the super_block - * @data: argument to each of them + * sget - find or create a superblock + * @type: filesystem type superblock should belong to + * @test: comparison callback + * @set: setup callback + * @flags: mount flags + * @data: argument to each of them */ -struct super_block *sget_userns(struct file_system_type *type, +struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), - int flags, struct user_namespace *user_ns, + int flags, void *data) { + struct user_namespace *user_ns = current_user_ns(); struct super_block *s = NULL; struct super_block *old; int err; + /* We don't yet pass the user namespace of the parent + * mount through to here so always use &init_user_ns + * until that changes. + */ + if (flags & SB_SUBMOUNT) + user_ns = &init_user_ns; + retry: spin_lock(&sb_lock); if (test) { @@ -621,35 +628,6 @@ retry: register_shrinker_prepared(&s->s_shrink); return s; } - -EXPORT_SYMBOL(sget_userns); - -/** - * sget - find or create a superblock - * @type: filesystem type superblock should belong to - * @test: comparison callback - * @set: setup callback - * @flags: mount flags - * @data: argument to each of them - */ -struct super_block *sget(struct file_system_type *type, - int (*test)(struct super_block *,void *), - int (*set)(struct super_block *,void *), - int flags, - void *data) -{ - struct user_namespace *user_ns = current_user_ns(); - - /* We don't yet pass the user namespace of the parent - * mount through to here so always use &init_user_ns - * until that changes. - */ - if (flags & SB_SUBMOUNT) - user_ns = &init_user_ns; - - return sget_userns(type, test, set, flags, user_ns, data); -} - EXPORT_SYMBOL(sget); void drop_super(struct super_block *sb) diff --git a/include/linux/fs.h b/include/linux/fs.h index d625acabbfcf..71421856ff2c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2245,11 +2245,6 @@ void free_anon_bdev(dev_t); struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context *)); -struct super_block *sget_userns(struct file_system_type *type, - int (*test)(struct super_block *,void *), - int (*set)(struct super_block *,void *), - int flags, struct user_namespace *user_ns, - void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), -- cgit v1.2.3 From 7375dca1647fa978310f2d706ddbff537f72110b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 20 May 2019 09:26:24 -0400 Subject: ftrace: Make enable and update parameters bool when applicable The code modification functions have "enable" and "update" variables that are sometimes "int" but used as "bool". Remove the ambiguity and make them "bool" when they are only used for true or false values. Link: http://lkml.kernel.org/r/e1429923d9eda92a3cf5ee9e33c7eacce539781d.1558115654.git.naveen.n.rao@linux.vnet.ibm.com Reported-by: "Naveen N. Rao" Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- kernel/trace/ftrace.c | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 25e2995d4a4c..8a8cb3c401b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); iter = ftrace_rec_iter_next(iter)) -int ftrace_update_record(struct dyn_ftrace *rec, int enable); -int ftrace_test_record(struct dyn_ftrace *rec, int enable); +int ftrace_update_record(struct dyn_ftrace *rec, bool enable); +int ftrace_test_record(struct dyn_ftrace *rec, bool enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); unsigned long ftrace_location_range(unsigned long start, unsigned long end); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a12aff849c04..4f2c26bebe2a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1768,7 +1768,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, count++; /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ - update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE; + update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE; /* Shortcut, if we handled all records, we are done. */ if (!all && count == hash->count) @@ -2047,7 +2047,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) } } -static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) +static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) { unsigned long flag = 0UL; @@ -2146,28 +2146,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) /** * ftrace_update_record, set a record that now is tracing or not * @rec: the record to update - * @enable: set to 1 if the record is tracing, zero to force disable + * @enable: set to true if the record is tracing, false to force disable * * The records that represent all functions that can be traced need * to be updated when tracing has been enabled. */ -int ftrace_update_record(struct dyn_ftrace *rec, int enable) +int ftrace_update_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 1); + return ftrace_check_record(rec, enable, true); } /** * ftrace_test_record, check if the record has been enabled or not * @rec: the record to test - * @enable: set to 1 to check if enabled, 0 if it is disabled + * @enable: set to true to check if enabled, false if it is disabled * * The arch code may need to test if a record is already set to * tracing to determine how to modify the function code that it * represents. */ -int ftrace_test_record(struct dyn_ftrace *rec, int enable) +int ftrace_test_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 0); + return ftrace_check_record(rec, enable, false); } static struct ftrace_ops * @@ -2356,7 +2356,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) } static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_old_addr; unsigned long ftrace_addr; @@ -2395,7 +2395,7 @@ void __weak ftrace_replace_code(int mod_flags) { struct dyn_ftrace *rec; struct ftrace_page *pg; - int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; + bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL; int failed; -- cgit v1.2.3 From 2d8d8fac3b4eab035dcd0068e1f5a746a697fbb3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:06 +0900 Subject: x86/uaccess: Allow access_ok() in irq context if pagefault_disabled WARN_ON_IN_IRQ() assumes that the access_ok() and following user memory access can sleep. But this assumption is not always correct; when the pagefault is disabled, following memory access will just returns -EFAULT and never sleep. Add pagefault_disabled() check in WARN_ON_ONCE() so that it can ignore the case we call it with disabling pagefault. For this purpose, this modified pagefault_disabled() as an inline function. Link: http://lkml.kernel.org/r/155789868664.26965.7932665824135793317.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/uaccess.h | 4 +++- include/linux/uaccess.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c82abd6e4ca3..9c4435307ff8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un }) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task()) +static inline bool pagefault_disabled(void); +# define WARN_ON_IN_IRQ() \ + WARN_ON_ONCE(!in_task() && !pagefault_disabled()) #else # define WARN_ON_IN_IRQ() #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2b70130af585..5a43ef7db492 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -203,7 +203,10 @@ static inline void pagefault_enable(void) /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ -#define pagefault_disabled() (current->pagefault_disabled != 0) +static inline bool pagefault_disabled(void) +{ + return current->pagefault_disabled != 0; +} /* * The pagefault handler is in general disabled by pagefault_disable() or -- cgit v1.2.3 From 3d7081822f7f9eab867d9bcc8fd635208ec438e0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:18 +0900 Subject: uaccess: Add non-pagefault user-space read functions Add probe_user_read(), strncpy_from_unsafe_user() and strnlen_unsafe_user() which allows caller to access user-space in IRQ context. Current probe_kernel_read() and strncpy_from_unsafe() are not available for user-space memory, because it sets KERNEL_DS while accessing data. On some arch, user address space and kernel address space can be co-exist, but others can not. In that case, setting KERNEL_DS means given address is treated as a kernel address space. Also strnlen_user() is only available from user context since it can sleep if pagefault is enabled. To access user-space memory without pagefault, we need these new functions which sets USER_DS while accessing the data. Link: http://lkml.kernel.org/r/155789869802.26965.4940338412595759063.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 14 ++++++ mm/maccess.c | 122 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 130 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a43ef7db492..9c435c3f2105 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -242,6 +242,17 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to, extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); +/* + * probe_user_read(): safely attempt to read from a location in user space + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk + * + * Safely read from address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long probe_user_read(void *dst, const void __user *src, size_t size); + /* * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -255,6 +266,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/mm/maccess.c b/mm/maccess.c index ec00be51a24f..19c8c3dc14df 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -5,8 +5,20 @@ #include #include +static __always_inline long +probe_read_common(void *dst, const void __user *src, size_t size) +{ + long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, size); + pagefault_enable(); + + return ret ? -EFAULT : 0; +} + /** - * probe_kernel_read(): safely attempt to read from a location + * probe_kernel_read(): safely attempt to read from a kernel-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -29,16 +41,40 @@ long __probe_kernel_read(void *dst, const void *src, size_t size) mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - pagefault_disable(); - ret = __copy_from_user_inatomic(dst, - (__force const void __user *)src, size); - pagefault_enable(); + ret = probe_read_common(dst, (__force const void __user *)src, size); set_fs(old_fs); - return ret ? -EFAULT : 0; + return ret; } EXPORT_SYMBOL_GPL(probe_kernel_read); +/** + * probe_user_read(): safely attempt to read from a user-space location + * @dst: pointer to the buffer that shall take the data + * @src: address to read from. This must be a user address. + * @size: size of the data chunk + * + * Safely read from user address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ + +long __weak probe_user_read(void *dst, const void __user *src, size_t size) + __attribute__((alias("__probe_user_read"))); + +long __probe_user_read(void *dst, const void __user *src, size_t size) +{ + long ret = -EFAULT; + mm_segment_t old_fs = get_fs(); + + set_fs(USER_DS); + if (access_ok(src, size)) + ret = probe_read_common(dst, src, size); + set_fs(old_fs); + + return ret; +} +EXPORT_SYMBOL_GPL(probe_user_read); + /** * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -66,6 +102,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) } EXPORT_SYMBOL_GPL(probe_kernel_write); + /** * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. * @dst: Destination address, in kernel space. This buffer must be at @@ -105,3 +142,76 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count) return ret ? -EFAULT : src - unsafe_addr; } + +/** + * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user + * address. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @unsafe_addr: Unsafe user address. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from unsafe user address to kernel buffer. + * + * On success, returns the length of the string INCLUDING the trailing NUL. + * + * If access fails, returns -EFAULT (some data may have been copied + * and the trailing NUL added). + * + * If @count is smaller than the length of the string, copies @count-1 bytes, + * sets the last byte of @dst buffer to NUL and returns @count. + */ +long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count) +{ + mm_segment_t old_fs = get_fs(); + long ret; + + if (unlikely(count <= 0)) + return 0; + + set_fs(USER_DS); + pagefault_disable(); + ret = strncpy_from_user(dst, unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + if (ret >= count) { + ret = count; + dst[ret - 1] = '\0'; + } else if (ret > 0) { + ret++; + } + + return ret; +} + +/** + * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL. + * @unsafe_addr: The string to measure. + * @count: Maximum count (including NUL) + * + * Get the size of a NUL-terminated string in user space without pagefault. + * + * Returns the size of the string INCLUDING the terminating NUL. + * + * If the string is too long, returns a number larger than @count. User + * has to check the return value against "> count". + * On exception (or invalid count), returns 0. + * + * Unlike strnlen_user, this can be used from IRQ handler etc. because + * it disables pagefaults. + */ +long strnlen_unsafe_user(const void __user *unsafe_addr, long count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + + set_fs(USER_DS); + pagefault_disable(); + ret = strnlen_user(unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + return ret; +} -- cgit v1.2.3 From 87a90956eeab260a469a51897bfda27b28adf67d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:27:44 +0900 Subject: uaccess: Add a prototype of non-static __probe_user_read() Declare a prototype of non-static __probe_user_read() as same as __probe_kernel_read() at uaccess.h. Reported-by: kbuild test robot Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9c435c3f2105..34a038563d97 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -252,6 +252,7 @@ extern long __probe_kernel_read(void *dst, const void *src, size_t size); * happens, handle that and return -EFAULT. */ extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long __probe_user_read(void *dst, const void __user *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location -- cgit v1.2.3 From 08eb1fb0f77b0036568d2228f3425f2595d671bb Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 26 May 2019 15:22:22 +0300 Subject: qed*: Change hwfn used for sb initialization When initializing status blocks use the affined hwfn instead of the leading one for RDMA / Storage Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/infiniband/hw/qedr/main.c | 3 +- drivers/net/ethernet/qlogic/qed/qed_main.c | 47 ++++++++++++++++------------ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +- include/linux/qed/qed_if.h | 10 +++++- 4 files changed, 40 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 083c2c00a8e9..806b3d0e57d8 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -312,7 +312,8 @@ static void qedr_free_mem_sb(struct qedr_dev *dev, struct qed_sb_info *sb_info, int sb_id) { if (sb_info->sb_virt) { - dev->ops->common->sb_release(dev->cdev, sb_info, sb_id); + dev->ops->common->sb_release(dev->cdev, sb_info, sb_id, + QED_SB_TYPE_CNQ); dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt), (void *)sb_info->sb_virt, sb_info->sb_phys); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6de23b56b294..7f19fefe0d79 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1301,26 +1301,21 @@ static u32 qed_sb_init(struct qed_dev *cdev, { struct qed_hwfn *p_hwfn; struct qed_ptt *p_ptt; - int hwfn_index; u16 rel_sb_id; - u8 n_hwfns; u32 rc; - /* RoCE uses single engine and CMT uses two engines. When using both - * we force only a single engine. Storage uses only engine 0 too. - */ - if (type == QED_SB_TYPE_L2_QUEUE) - n_hwfns = cdev->num_hwfns; - else - n_hwfns = 1; - - hwfn_index = sb_id % n_hwfns; - p_hwfn = &cdev->hwfns[hwfn_index]; - rel_sb_id = sb_id / n_hwfns; + /* RoCE/Storage use a single engine in CMT mode while L2 uses both */ + if (type == QED_SB_TYPE_L2_QUEUE) { + p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns]; + rel_sb_id = sb_id / cdev->num_hwfns; + } else { + p_hwfn = QED_AFFIN_HWFN(cdev); + rel_sb_id = sb_id; + } DP_VERBOSE(cdev, NETIF_MSG_INTR, "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n", - hwfn_index, rel_sb_id, sb_id); + IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id); if (IS_PF(p_hwfn->cdev)) { p_ptt = qed_ptt_acquire(p_hwfn); @@ -1339,20 +1334,26 @@ static u32 qed_sb_init(struct qed_dev *cdev, } static u32 qed_sb_release(struct qed_dev *cdev, - struct qed_sb_info *sb_info, u16 sb_id) + struct qed_sb_info *sb_info, + u16 sb_id, + enum qed_sb_type type) { struct qed_hwfn *p_hwfn; - int hwfn_index; u16 rel_sb_id; u32 rc; - hwfn_index = sb_id % cdev->num_hwfns; - p_hwfn = &cdev->hwfns[hwfn_index]; - rel_sb_id = sb_id / cdev->num_hwfns; + /* RoCE/Storage use a single engine in CMT mode while L2 uses both */ + if (type == QED_SB_TYPE_L2_QUEUE) { + p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns]; + rel_sb_id = sb_id / cdev->num_hwfns; + } else { + p_hwfn = QED_AFFIN_HWFN(cdev); + rel_sb_id = sb_id; + } DP_VERBOSE(cdev, NETIF_MSG_INTR, "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n", - hwfn_index, rel_sb_id, sb_id); + IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id); rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id); @@ -2372,6 +2373,11 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf, return rc; } +static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev) +{ + return QED_AFFIN_HWFN_IDX(cdev); +} + static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, @@ -2419,6 +2425,7 @@ const struct qed_common_ops qed_common_ops_pass = { .db_recovery_add = &qed_db_recovery_add, .db_recovery_del = &qed_db_recovery_del, .read_module_eeprom = &qed_read_module_eeprom, + .get_affin_hwfn_idx = &qed_get_affin_hwfn_idx, }; void qed_get_protocol_stats(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 02a97c659e29..a9684a881f2a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1306,7 +1306,8 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info, u16 sb_id) { if (sb_info->sb_virt) { - edev->ops->common->sb_release(edev->cdev, sb_info, sb_id); + edev->ops->common->sb_release(edev->cdev, sb_info, sb_id, + QED_SB_TYPE_L2_QUEUE); dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt), (void *)sb_info->sb_virt, sb_info->sb_phys); memset(sb_info, 0, sizeof(*sb_info)); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 48841e5dab90..eef02e64b422 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -907,7 +907,8 @@ struct qed_common_ops { u32 (*sb_release)(struct qed_dev *cdev, struct qed_sb_info *sb_info, - u16 sb_id); + u16 sb_id, + enum qed_sb_type type); void (*simd_handler_config)(struct qed_dev *cdev, void *token, @@ -1123,6 +1124,13 @@ struct qed_common_ops { */ int (*read_module_eeprom)(struct qed_dev *cdev, char *buf, u8 dev_addr, u32 offset, u32 len); + +/** + * @brief get_affin_hwfn_idx + * + * @param cdev + */ + u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 3576e99e08217f291290ac62431c7e330ac111c4 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 26 May 2019 15:22:27 +0300 Subject: qed*: Add iWARP 100g support Add iWARP engine affinity setting for supporting iWARP over 100g. iWARP cannot be distinguished by the LLH from L2, hence the engine division will affect L2 as well. For this reason we add a parameter to devlink to determine the engine division. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/infiniband/hw/qedr/main.c | 13 +++++++++++++ drivers/net/ethernet/qlogic/qed/qed_rdma.c | 31 ++++++++++++++++++++++++++++++ include/linux/qed/qed_rdma_if.h | 2 ++ 3 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 055a63144480..5ebf3c53b3fb 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -871,7 +871,16 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, dev->user_dpm_enabled = dev_info.user_dpm_enabled; dev->rdma_type = dev_info.rdma_type; dev->num_hwfns = dev_info.common.num_hwfns; + + if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) { + rc = dev->ops->iwarp_set_engine_affin(cdev, false); + if (rc) { + DP_ERR(dev, "iWARP is disabled over a 100g device Enabling it may impact L2 performance. To enable it run devlink dev param set name iwarp_cmt value true cmode runtime\n"); + goto init_err; + } + } dev->affin_hwfn_idx = dev->ops->common->get_affin_hwfn_idx(cdev); + dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev); @@ -932,6 +941,10 @@ static void qedr_remove(struct qedr_dev *dev) qedr_stop_hw(dev); qedr_sync_free_irqs(dev); qedr_free_resources(dev); + + if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) + dev->ops->iwarp_set_engine_affin(dev->cdev, true); + ib_dealloc_device(&dev->ibdev); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index e4d63359864e..f900fde448db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1916,6 +1916,36 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, return rc; } +static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset) +{ + enum qed_eng eng; + u8 ppfid = 0; + int rc; + + /* Make sure iwarp cmt mode is enabled before setting affinity */ + if (!cdev->iwarp_cmt) + return -EINVAL; + + if (b_reset) + eng = QED_BOTH_ENG; + else + eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0; + + rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng); + if (rc) { + DP_NOTICE(cdev, + "Failed to set the engine affinity of ppfid %d\n", + ppfid); + return rc; + } + + DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP), + "LLH: Set the engine affinity of non-RoCE packets as %d\n", + eng); + + return 0; +} + static const struct qed_rdma_ops qed_rdma_ops_pass = { .common = &qed_common_ops_pass, .fill_dev_info = &qed_fill_rdma_dev_info, @@ -1955,6 +1985,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet, .ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, .ll2_get_stats = &qed_ll2_get_stats, + .iwarp_set_engine_affin = &qed_iwarp_set_engine_affin, .iwarp_connect = &qed_iwarp_connect, .iwarp_create_listen = &qed_iwarp_create_listen, .iwarp_destroy_listen = &qed_iwarp_destroy_listen, diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index d15f8e4815e3..898f595ea3d6 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -670,6 +670,8 @@ struct qed_rdma_ops { int (*ll2_set_mac_filter)(struct qed_dev *cdev, u8 *old_mac_address, u8 *new_mac_address); + int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset); + int (*iwarp_connect)(void *rdma_cxt, struct qed_iwarp_connect_in *iparams, struct qed_iwarp_connect_out *oparams); -- cgit v1.2.3 From 8242c59315b7b40ac97e1274d715665569992ff4 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:19 -0500 Subject: soundwire: add port-related definitions Somehow previous header files did not include definition for sink/source, flow and grouping. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 35662d9c2c62..69ae680a5a21 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -41,6 +41,31 @@ struct sdw_slave; #define SDW_DAI_ID_RANGE_START 100 #define SDW_DAI_ID_RANGE_END 200 +enum { + SDW_PORT_DIRN_SINK = 0, + SDW_PORT_DIRN_SOURCE, + SDW_PORT_DIRN_MAX, +}; + +/* + * constants for flow control, ports and transport + * + * these are bit masks as devices can have multiple capabilities + */ + +/* + * flow modes for SDW port. These can be isochronous, tx controlled, + * rx controlled or async + */ +#define SDW_PORT_FLOW_MODE_ISOCH 0 +#define SDW_PORT_FLOW_MODE_TX_CNTRL BIT(0) +#define SDW_PORT_FLOW_MODE_RX_CNTRL BIT(1) +#define SDW_PORT_FLOW_MODE_ASYNC GENMASK(1, 0) + +/* sample packaging for block. It can be per port or per channel */ +#define SDW_BLOCK_PACKG_PER_PORT BIT(0) +#define SDW_BLOCK_PACKG_PER_CH BIT(1) + /** * enum sdw_slave_status - Slave status * @SDW_SLAVE_UNATTACHED: Slave is not attached with the bus. @@ -76,6 +101,14 @@ enum sdw_command_response { SDW_CMD_FAIL_OTHER = 4, }; +/* block group count enum */ +enum sdw_dpn_grouping { + SDW_BLK_GRP_CNT_1 = 0, + SDW_BLK_GRP_CNT_2 = 1, + SDW_BLK_GRP_CNT_3 = 2, + SDW_BLK_GRP_CNT_4 = 3, +}; + /** * enum sdw_stream_type: data stream type * @@ -100,6 +133,26 @@ enum sdw_data_direction { SDW_DATA_DIR_TX = 1, }; +/** + * enum sdw_port_data_mode: Data Port mode + * + * @SDW_PORT_DATA_MODE_NORMAL: Normal data mode where audio data is received + * and transmitted. + * @SDW_PORT_DATA_MODE_STATIC_1: Simple test mode which uses static value of + * logic 1. The encoding will result in signal transitions at every bitslot + * owned by this Port + * @SDW_PORT_DATA_MODE_STATIC_0: Simple test mode which uses static value of + * logic 0. The encoding will result in no signal transitions + * @SDW_PORT_DATA_MODE_PRBS: Test mode which uses a PRBS generator to produce + * a pseudo random data pattern that is transferred + */ +enum sdw_port_data_mode { + SDW_PORT_DATA_MODE_NORMAL = 0, + SDW_PORT_DATA_MODE_STATIC_1 = 1, + SDW_PORT_DATA_MODE_STATIC_0 = 2, + SDW_PORT_DATA_MODE_PRBS = 3, +}; + /* * SDW properties, defined in MIPI DisCo spec v1.0 */ -- cgit v1.2.3 From 53f3c097375c94e87b41227ebdff83c4f4e3af41 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:20 -0500 Subject: soundwire: remove master data port properties The SoundWire and DisCo specifications do not define Master data ports or related properties. Data ports are only defined for Slave devices, so remove the unused member in properties. Credits: this patch is based on an earlier internal contribution by Vinod Koul, Sanyog Kale, Shreyas Nc and Hardik Shah. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 69ae680a5a21..831a370eaedd 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -377,7 +377,6 @@ struct sdw_slave_prop { * @dynamic_frame: Dynamic frame supported * @err_threshold: Number of times that software may retry sending a single * command - * @dpn_prop: Data Port N properties */ struct sdw_master_prop { u32 revision; @@ -393,7 +392,6 @@ struct sdw_master_prop { u32 default_col; bool dynamic_frame; u32 err_threshold; - struct sdw_dpn_prop *dpn_prop; }; int sdw_master_read_prop(struct sdw_bus *bus); -- cgit v1.2.3 From 446701d1d161f04aa107903bfe7ec14e1f6cd17f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:21 -0500 Subject: soundwire: mipi-disco: remove master_count property for masters The master_count is only defined for a Controller or a Slave in the MIPI DisCo for SoundWire document. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 831a370eaedd..14376d8458c3 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -364,7 +364,6 @@ struct sdw_slave_prop { /** * struct sdw_master_prop - Master properties * @revision: MIPI spec version of the implementation - * @master_count: Number of masters * @clk_stop_mode: Bitmap for Clock Stop modes supported * @max_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported @@ -380,7 +379,6 @@ struct sdw_slave_prop { */ struct sdw_master_prop { u32 revision; - u32 master_count; enum sdw_clk_stop_mode clk_stop_mode; u32 max_freq; u32 num_clk_gears; -- cgit v1.2.3 From 3424305b8be456a8e23c951b8c9aebad0c765ff7 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:22 -0500 Subject: soundwire: rename 'freq' fields Rename all fields with 'freq' as 'clk_freq' to follow the MIPI specification and avoid confusion between bus clock and audio clocks. No functionality change. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 4 ++-- drivers/soundwire/intel.c | 11 ++++++----- drivers/soundwire/mipi_disco.c | 23 ++++++++++++----------- drivers/soundwire/stream.c | 2 +- include/linux/soundwire/sdw.h | 12 ++++++------ 5 files changed, 27 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index aac35fc3cf22..96e42df8f458 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -87,7 +87,7 @@ int sdw_add_bus_master(struct sdw_bus *bus) /* * Initialize clock values based on Master properties. The max - * frequency is read from max_freq property. Current assumption + * frequency is read from max_clk_freq property. Current assumption * is that the bus will start at highest clock frequency when * powered on. * @@ -95,7 +95,7 @@ int sdw_add_bus_master(struct sdw_bus *bus) * to start with bank 0 (Table 40 of Spec) */ prop = &bus->prop; - bus->params.max_dr_freq = prop->max_freq * SDW_DOUBLE_RATE_FACTOR; + bus->params.max_dr_freq = prop->max_clk_freq * SDW_DOUBLE_RATE_FACTOR; bus->params.curr_dr_freq = bus->params.max_dr_freq; bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 31336b0271b0..4ac141730b13 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -796,13 +796,14 @@ static int intel_prop_read(struct sdw_bus *bus) sdw_master_read_prop(bus); /* BIOS is not giving some values correctly. So, lets override them */ - bus->prop.num_freq = 1; - bus->prop.freq = devm_kcalloc(bus->dev, bus->prop.num_freq, - sizeof(*bus->prop.freq), GFP_KERNEL); - if (!bus->prop.freq) + bus->prop.num_clk_freq = 1; + bus->prop.clk_freq = devm_kcalloc(bus->dev, bus->prop.num_clk_freq, + sizeof(*bus->prop.clk_freq), + GFP_KERNEL); + if (!bus->prop.clk_freq) return -ENOMEM; - bus->prop.freq[0] = bus->prop.max_freq; + bus->prop.clk_freq[0] = bus->prop.max_clk_freq; bus->prop.err_threshold = 5; return 0; diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 6df68584c963..b1770af43fa8 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -58,31 +58,32 @@ int sdw_master_read_prop(struct sdw_bus *bus) fwnode_property_read_u32(link, "mipi-sdw-max-clock-frequency", - &prop->max_freq); + &prop->max_clk_freq); nval = fwnode_property_read_u32_array(link, "mipi-sdw-clock-frequencies-supported", NULL, 0); if (nval > 0) { - prop->num_freq = nval; - prop->freq = devm_kcalloc(bus->dev, prop->num_freq, - sizeof(*prop->freq), GFP_KERNEL); - if (!prop->freq) + prop->num_clk_freq = nval; + prop->clk_freq = devm_kcalloc(bus->dev, prop->num_clk_freq, + sizeof(*prop->clk_freq), + GFP_KERNEL); + if (!prop->clk_freq) return -ENOMEM; fwnode_property_read_u32_array(link, "mipi-sdw-clock-frequencies-supported", - prop->freq, prop->num_freq); + prop->clk_freq, prop->num_clk_freq); } /* * Check the frequencies supported. If FW doesn't provide max * freq, then populate here by checking values. */ - if (!prop->max_freq && prop->freq) { - prop->max_freq = prop->freq[0]; - for (i = 1; i < prop->num_freq; i++) { - if (prop->freq[i] > prop->max_freq) - prop->max_freq = prop->freq[i]; + if (!prop->max_clk_freq && prop->clk_freq) { + prop->max_clk_freq = prop->clk_freq[0]; + for (i = 1; i < prop->num_clk_freq; i++) { + if (prop->clk_freq[i] > prop->max_clk_freq) + prop->max_clk_freq = prop->clk_freq[i]; } } diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index d01060dbee96..89edc897b8eb 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1474,7 +1474,7 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream) memcpy(¶ms, &bus->params, sizeof(params)); /* TODO: Support Asynchronous mode */ - if ((prop->max_freq % stream->params.rate) != 0) { + if ((prop->max_clk_freq % stream->params.rate) != 0) { dev_err(bus->dev, "Async mode not supported\n"); return -EINVAL; } diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 14376d8458c3..c6ded0d7a9f2 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -365,11 +365,11 @@ struct sdw_slave_prop { * struct sdw_master_prop - Master properties * @revision: MIPI spec version of the implementation * @clk_stop_mode: Bitmap for Clock Stop modes supported - * @max_freq: Maximum Bus clock frequency, in Hz + * @max_clk_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported * @clk_gears: Clock gears supported - * @num_freq: Number of clock frequencies supported, in Hz - * @freq: Clock frequencies supported, in Hz + * @num_clk_freq: Number of clock frequencies supported, in Hz + * @clk_freq: Clock frequencies supported, in Hz * @default_frame_rate: Controller default Frame rate, in Hz * @default_row: Number of rows * @default_col: Number of columns @@ -380,11 +380,11 @@ struct sdw_slave_prop { struct sdw_master_prop { u32 revision; enum sdw_clk_stop_mode clk_stop_mode; - u32 max_freq; + u32 max_clk_freq; u32 num_clk_gears; u32 *clk_gears; - u32 num_freq; - u32 *freq; + u32 num_clk_freq; + u32 *clk_freq; u32 default_frame_rate; u32 default_row; u32 default_col; -- cgit v1.2.3 From 53d2e9c3773478d00d1851cc048d7ef9e60c7b6d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:23 -0500 Subject: soundwire: mipi-disco: fix clock stop modes Fix support for clock_stop_mode0 and 1. The existing code uses a bitmask between enums, one of which being zero. Or-ing with zero is not very useful in general...Fix by or-ing with a BIT dependent on the enum value. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- drivers/soundwire/mipi_disco.c | 4 ++-- include/linux/soundwire/sdw.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index b1770af43fa8..efb87ee0e7fc 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -50,11 +50,11 @@ int sdw_master_read_prop(struct sdw_bus *bus) if (fwnode_property_read_bool(link, "mipi-sdw-clock-stop-mode0-supported")) - prop->clk_stop_mode = SDW_CLK_STOP_MODE0; + prop->clk_stop_modes |= BIT(SDW_CLK_STOP_MODE0); if (fwnode_property_read_bool(link, "mipi-sdw-clock-stop-mode1-supported")) - prop->clk_stop_mode |= SDW_CLK_STOP_MODE1; + prop->clk_stop_modes |= BIT(SDW_CLK_STOP_MODE1); fwnode_property_read_u32(link, "mipi-sdw-max-clock-frequency", diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index c6ded0d7a9f2..0e3fdd03e589 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -364,7 +364,7 @@ struct sdw_slave_prop { /** * struct sdw_master_prop - Master properties * @revision: MIPI spec version of the implementation - * @clk_stop_mode: Bitmap for Clock Stop modes supported + * @clk_stop_modes: Bitmap, bit N set when clock-stop-modeN supported * @max_clk_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported * @clk_gears: Clock gears supported @@ -379,7 +379,7 @@ struct sdw_slave_prop { */ struct sdw_master_prop { u32 revision; - enum sdw_clk_stop_mode clk_stop_mode; + u32 clk_stop_modes; u32 max_clk_freq; u32 num_clk_gears; u32 *clk_gears; -- cgit v1.2.3 From f6686a7d026dc00cecca2d2cefb7f75bb2f84801 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:24 -0500 Subject: soundwire: clarify comment The MIPI DisCo spec refers to dynamic frame shape, not to dynamic shape. Clarify. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 0e3fdd03e589..b7efa819d425 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -373,7 +373,7 @@ struct sdw_slave_prop { * @default_frame_rate: Controller default Frame rate, in Hz * @default_row: Number of rows * @default_col: Number of columns - * @dynamic_frame: Dynamic frame supported + * @dynamic_frame: Dynamic frame shape supported * @err_threshold: Number of times that software may retry sending a single * command */ -- cgit v1.2.3 From 8acbbfec280f1ee72ebeec407e39aa0d1b879b59 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 May 2019 14:47:25 -0500 Subject: soundwire: rename/clarify MIPI DisCo properties The existing definitions are ambiguous and possibly misleading. For DP0, 'flow-control' is only relevant for the BRA protocol and should not be confused with async modes explicitly not supported for DP0, add prefix to follow MIPI DisCo definition The use of 'device_interrupts' is also questionable. The MIPI SoundWire spec defines Slave-, DP0- and DPN-level implementation-defined interrupts. Using the 'device' prefix in the last two cases is misleading, not only is the term 'device' overloaded but these properties are only valid at the DP0 and DPn levels. Rename to follow the MIPI definitions, no need to be creative here. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 2 +- drivers/soundwire/mipi_disco.c | 6 +++--- drivers/soundwire/stream.c | 6 +++--- include/linux/soundwire/sdw.h | 13 +++++++------ 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 96e42df8f458..fe745830a261 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -648,7 +648,7 @@ static int sdw_initialize_slave(struct sdw_slave *slave) return 0; /* Enable DP0 interrupts */ - val = prop->dp0_prop->device_interrupts; + val = prop->dp0_prop->imp_def_interrupts; val |= SDW_DP0_INT_PORT_READY | SDW_DP0_INT_BRA_FAILURE; ret = sdw_update(slave, SDW_DP0_INTMASK, val, val); diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index efb87ee0e7fc..79fee1b21ab6 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -150,13 +150,13 @@ static int sdw_slave_read_dp0(struct sdw_slave *slave, dp0->words, dp0->num_words); } - dp0->flow_controlled = fwnode_property_read_bool(port, + dp0->BRA_flow_controlled = fwnode_property_read_bool(port, "mipi-sdw-bra-flow-controlled"); dp0->simple_ch_prep_sm = fwnode_property_read_bool(port, "mipi-sdw-simplified-channel-prepare-sm"); - dp0->device_interrupts = fwnode_property_read_bool(port, + dp0->imp_def_interrupts = fwnode_property_read_bool(port, "mipi-sdw-imp-def-dp0-interrupts-supported"); return 0; @@ -225,7 +225,7 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, fwnode_property_read_u32(node, "mipi-sdw-imp-def-dpn-interrupts-supported", - &dpn[i].device_interrupts); + &dpn[i].imp_def_interrupts); fwnode_property_read_u32(node, "mipi-sdw-min-channel-number", &dpn[i].min_ch); diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 89edc897b8eb..ce9cb7fa4724 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -439,7 +439,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, prep_ch.bank = bus->params.next_bank; - if (dpn_prop->device_interrupts || !dpn_prop->simple_ch_prep_sm) + if (dpn_prop->imp_def_interrupts || !dpn_prop->simple_ch_prep_sm) intr = true; /* @@ -449,7 +449,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, */ if (prep && intr) { ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep, - dpn_prop->device_interrupts); + dpn_prop->imp_def_interrupts); if (ret < 0) return ret; } @@ -493,7 +493,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, /* Disable interrupt after Port de-prepare */ if (!prep && intr) ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep, - dpn_prop->device_interrupts); + dpn_prop->imp_def_interrupts); return ret; } diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index b7efa819d425..bea46bd8b6ce 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -206,10 +206,11 @@ enum sdw_clk_stop_mode { * (inclusive) * @num_words: number of wordlengths supported * @words: wordlengths supported - * @flow_controlled: Slave implementation results in an OK_NotReady + * @BRA_flow_controlled: Slave implementation results in an OK_NotReady * response * @simple_ch_prep_sm: If channel prepare sequence is required - * @device_interrupts: If implementation-defined interrupts are supported + * @imp_def_interrupts: If set, each bit corresponds to support for + * implementation-defined interrupts * * The wordlengths are specified by Spec as max, min AND number of * discrete values, implementation can define based on the wordlengths they @@ -220,9 +221,9 @@ struct sdw_dp0_prop { u32 min_word; u32 num_words; u32 *words; - bool flow_controlled; + bool BRA_flow_controlled; bool simple_ch_prep_sm; - bool device_interrupts; + bool imp_def_interrupts; }; /** @@ -272,7 +273,7 @@ struct sdw_dpn_audio_mode { * @simple_ch_prep_sm: If the port supports simplified channel prepare state * machine * @ch_prep_timeout: Port-specific timeout value, in milliseconds - * @device_interrupts: If set, each bit corresponds to support for + * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @max_ch: Maximum channels supported * @min_ch: Minimum channels supported @@ -297,7 +298,7 @@ struct sdw_dpn_prop { u32 max_grouping; bool simple_ch_prep_sm; u32 ch_prep_timeout; - u32 device_interrupts; + u32 imp_def_interrupts; u32 max_ch; u32 min_ch; u32 num_ch; -- cgit v1.2.3 From f5151311c3f37f6edc85b2253ccf6d3e2a4c4c26 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 20 May 2019 19:32:14 +0800 Subject: dmaengine: Add matching device node validation in __dma_request_channel() When user try to request one DMA channel by __dma_request_channel(), it won't validate if it is the correct DMA device to request, that will lead each DMA engine driver to validate the correct device node in their filter function if it is necessary. Thus we can add the matching device node validation in the DMA engine core, to remove all of device node validation in the drivers. Tested-by: Peter Ujfalusi Signed-off-by: Baolin Wang Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 10 ++++++++-- drivers/dma/of-dma.c | 4 ++-- include/linux/dmaengine.h | 12 ++++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 3a11b1092e80..610080c629bb 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -641,11 +641,13 @@ EXPORT_SYMBOL_GPL(dma_get_any_slave_channel); * @mask: capabilities that the channel must satisfy * @fn: optional callback to disposition available channels * @fn_param: opaque parameter to pass to dma_filter_fn + * @np: device node to look for DMA channels * * Returns pointer to appropriate DMA channel on success or NULL. */ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, - dma_filter_fn fn, void *fn_param) + dma_filter_fn fn, void *fn_param, + struct device_node *np) { struct dma_device *device, *_d; struct dma_chan *chan = NULL; @@ -653,6 +655,10 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, /* Find a channel */ mutex_lock(&dma_list_mutex); list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + /* Finds a DMA controller with matching device node */ + if (np && device->dev->of_node && np != device->dev->of_node) + continue; + chan = find_candidate(device, mask, fn, fn_param); if (!IS_ERR(chan)) break; @@ -769,7 +775,7 @@ struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask) if (!mask) return ERR_PTR(-ENODEV); - chan = __dma_request_channel(mask, NULL, NULL); + chan = __dma_request_channel(mask, NULL, NULL, NULL); if (!chan) { mutex_lock(&dma_list_mutex); if (list_empty(&dma_device_list)) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 91fd395c90c4..6b43d04da05d 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -316,8 +316,8 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, if (count != 1) return NULL; - return dma_request_channel(info->dma_cap, info->filter_fn, - &dma_spec->args[0]); + return __dma_request_channel(&info->dma_cap, info->filter_fn, + &dma_spec->args[0], dma_spec->np); } EXPORT_SYMBOL_GPL(of_dma_simple_xlate); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d49ec5c31944..504085b2bf21 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1314,7 +1314,8 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); void dma_issue_pending_all(void); struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, - dma_filter_fn fn, void *fn_param); + dma_filter_fn fn, void *fn_param, + struct device_node *np); struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name); struct dma_chan *dma_request_chan(struct device *dev, const char *name); @@ -1339,7 +1340,9 @@ static inline void dma_issue_pending_all(void) { } static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, - dma_filter_fn fn, void *fn_param) + dma_filter_fn fn, + void *fn_param, + struct device_node *np) { return NULL; } @@ -1411,7 +1414,8 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_get_slave_channel(struct dma_chan *chan); struct dma_chan *dma_get_any_slave_channel(struct dma_device *device); -#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +#define dma_request_channel(mask, x, y) \ + __dma_request_channel(&(mask), x, y, NULL) #define dma_request_slave_channel_compat(mask, x, y, dev, name) \ __dma_request_slave_channel_compat(&(mask), x, y, dev, name) @@ -1429,6 +1433,6 @@ static inline struct dma_chan if (!fn || !fn_param) return NULL; - return __dma_request_channel(mask, fn, fn_param); + return __dma_request_channel(mask, fn, fn_param, NULL); } #endif /* DMAENGINE_H */ -- cgit v1.2.3 From 990c0b53bf6599a9c9c7df1529dde681dee6cf64 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 20 May 2019 19:32:16 +0800 Subject: dmaengine: imx-sdma: Let the core do the device node validation Let the DMA engine core do the device node validation instead of drivers. Signed-off-by: Baolin Wang Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 9 ++------- include/linux/platform_data/dma-imx.h | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 99d9f431ae2c..ca296f0849ef 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1934,16 +1934,11 @@ disable_clk_ipg: static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param) { struct sdma_channel *sdmac = to_sdma_chan(chan); - struct sdma_engine *sdma = sdmac->sdma; struct imx_dma_data *data = fn_param; if (!imx_dma_is_general_purpose(chan)) return false; - /* return false if it's not the right device */ - if (sdma->dev->of_node != data->of_node) - return false; - sdmac->data = *data; chan->private = &sdmac->data; @@ -1971,9 +1966,9 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, * be set to sdmac->event_id1. */ data.dma_request2 = 0; - data.of_node = ofdma->of_node; - return dma_request_channel(mask, sdma_filter_fn, &data); + return __dma_request_channel(&mask, sdma_filter_fn, &data, + ofdma->of_node); } static int sdma_probe(struct platform_device *pdev) diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h index 9daea8d42a10..7d964e787299 100644 --- a/include/linux/platform_data/dma-imx.h +++ b/include/linux/platform_data/dma-imx.h @@ -55,7 +55,6 @@ struct imx_dma_data { int dma_request2; /* secondary DMA request line */ enum sdma_peripheral_type peripheral_type; int priority; - struct device_node *of_node; }; static inline int imx_dma_is_ipu(struct dma_chan *chan) -- cgit v1.2.3 From d27ac2e02bf256d4e824e7c1e1e1afa2b96cefcc Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 27 May 2019 09:55:16 +0300 Subject: include: fpga: adi-axi-common.h: add common regs & defs header The AXI HDL cores provided for Analog Devices reference designs all share some common base registers (e.g. version register at address 0x00). To reduce duplication for this, a common header is added to define these registers as well as bitfields & macros to work with these registers. Signed-off-by: Alexandru Ardelean Signed-off-by: Vinod Koul --- include/linux/fpga/adi-axi-common.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/fpga/adi-axi-common.h (limited to 'include/linux') diff --git a/include/linux/fpga/adi-axi-common.h b/include/linux/fpga/adi-axi-common.h new file mode 100644 index 000000000000..7fc95d5c95bb --- /dev/null +++ b/include/linux/fpga/adi-axi-common.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Analog Devices AXI common registers & definitions + * + * Copyright 2019 Analog Devices Inc. + * + * https://wiki.analog.com/resources/fpga/docs/axi_ip + * https://wiki.analog.com/resources/fpga/docs/hdl/regmap + */ + +#ifndef ADI_AXI_COMMON_H_ +#define ADI_AXI_COMMON_H_ + +#define ADI_AXI_REG_VERSION 0x0000 + +#define ADI_AXI_PCORE_VER(major, minor, patch) \ + (((major) << 16) | ((minor) << 8) | (patch)) + +#endif /* ADI_AXI_COMMON_H_ */ -- cgit v1.2.3 From cdd3a2499d30695730b22dc025c00b9b28884c6b Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Fri, 24 May 2019 16:40:16 -0700 Subject: iommu/vt-d: Introduce macros useful for dumping DMAR table A scalable mode DMAR table walk would involve looking at bits in each stage of walk, like, 1. Is PASID enabled in the context entry? 2. What's the size of PASID directory? 3. Is the PASID directory entry present? 4. Is the PASID table entry present? 5. Number of PASID table entries? Hence, add these macros that will later be used during this walk. Apart from adding new macros, move existing macros (like pasid_pde_is_present(), get_pasid_table_from_pde() and pasid_supported()) to appropriate header files so that they could be reused. Cc: Joerg Roedel Cc: Ashok Raj Cc: Lu Baolu Cc: Sohil Mehta Cc: David Woodhouse Cc: Jacob Pan Cc: Andy Shevchenko Reviewed-by: Lu Baolu Reviewed-by: Andy Shevchenko Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 +----- drivers/iommu/intel-pasid.c | 17 ----------------- drivers/iommu/intel-pasid.h | 26 ++++++++++++++++++++++++++ include/linux/intel-iommu.h | 6 ++++++ 4 files changed, 33 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1b7ad80c0537..9722c2ffe428 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -357,6 +357,7 @@ int dmar_disabled = 0; int dmar_disabled = 1; #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ +int intel_iommu_sm; int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); @@ -364,17 +365,12 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_sm; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) -#define pasid_supported(iommu) (sm_supported(iommu) && \ - ecap_pasid((iommu)->ecap)) - int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 2fefeafda437..6895a23b2157 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -169,23 +169,6 @@ attach_out: return 0; } -/* Get PRESENT bit of a PASID directory entry. */ -static inline bool -pasid_pde_is_present(struct pasid_dir_entry *pde) -{ - return READ_ONCE(pde->val) & PASID_PTE_PRESENT; -} - -/* Get PASID table from a PASID directory entry. */ -static inline struct pasid_entry * -get_pasid_table_from_pde(struct pasid_dir_entry *pde) -{ - if (!pasid_pde_is_present(pde)) - return NULL; - - return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); -} - void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 23537b3f34e3..fc8cd8f17de1 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -18,6 +18,10 @@ #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 #define MAX_NR_PASID_BITS 20 +#define PASID_TBL_ENTRIES BIT(PASID_PDE_SHIFT) + +#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) +#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) /* * Domain ID reserved for pasid entries programmed for first-level @@ -49,6 +53,28 @@ struct pasid_table { struct list_head dev; /* device list */ }; +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT; +} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + +/* Get PRESENT bit of a PASID table entry. */ +static inline bool pasid_pte_is_present(struct pasid_entry *pte) +{ + return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; +} + extern u32 intel_pasid_max_id; int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); void intel_pasid_free_id(int pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6925a18a5ca3..4140726867a9 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -447,6 +447,12 @@ enum { #define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) +extern int intel_iommu_sm; + +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) + struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; -- cgit v1.2.3 From cb44c9a0ab21a9ae4dfcabac1ed8e38aa872d1af Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 21 May 2019 10:03:48 -0500 Subject: signal: Remove task parameter from force_sigsegv The function force_sigsegv is always called on the current task so passing in current is redundant and not passing in current makes this fact obvious. This also makes it clear force_sigsegv always calls force_sig on the current task. Signed-off-by: "Eric W. Biederman" --- arch/arc/kernel/process.c | 2 +- arch/ia64/kernel/signal.c | 6 +++--- arch/nios2/kernel/signal.c | 2 +- arch/sparc/kernel/signal32.c | 4 ++-- arch/sparc/kernel/signal_64.c | 2 +- arch/um/kernel/skas/mmu.c | 2 +- arch/um/kernel/trap.c | 2 +- arch/unicore32/kernel/signal.c | 2 +- fs/exec.c | 2 +- include/linux/sched/signal.h | 2 +- kernel/rseq.c | 2 +- kernel/signal.c | 6 ++++-- 12 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 641c364fc232..725e556678a4 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -313,7 +313,7 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return 0; } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 6062fd14e34e..518cceb5d4af 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -257,7 +257,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) */ check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN; if (!likely(on_sig_stack(check_sp))) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } } @@ -265,7 +265,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); if (!access_ok(frame, sizeof(*frame))) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } @@ -282,7 +282,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) err |= setup_sigcontext(&frame->sc, set, scr); if (unlikely(err)) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 4a81876b6086..9bf38531b189 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -211,7 +211,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, return 0; give_sigsegv: - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EFAULT; } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index e800ce13cc6e..fb431d47a532 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -375,7 +375,7 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs, pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } @@ -509,7 +509,7 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index ca70787efd8e..9d50190cf312 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -374,7 +374,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 7a1f2a936fd1..29e7f5f9f188 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -119,7 +119,7 @@ void uml_setup_stubs(struct mm_struct *mm) return; out: - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); } void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 0e8b6158f224..646059402ab3 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -169,7 +169,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c index 63be04809d40..75f27dc68bd0 100644 --- a/arch/unicore32/kernel/signal.c +++ b/arch/unicore32/kernel/signal.c @@ -386,7 +386,7 @@ static void do_signal(struct pt_regs *regs, int syscall) regs->UCreg_pc = KERN_RESTART_CODE; } else { regs->UCreg_sp += 4; - force_sigsegv(0, current); + force_sigsegv(0); } } if (regs->UCreg_00 == -ERESTARTNOHAND || diff --git a/fs/exec.c b/fs/exec.c index d88584ebf07f..f5568e45d521 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1662,7 +1662,7 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return retval; } if (retval != -ENOEXEC || !bprm->file) { diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c68ca81db0a1..8af3101da782 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -325,7 +325,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey); int force_sig_ptrace_errno_trap(int errno, void __user *addr); extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *); -extern void force_sigsegv(int sig, struct task_struct *p); +extern void force_sigsegv(int sig); extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); diff --git a/kernel/rseq.c b/kernel/rseq.c index 9424ee90589e..e1aa3ebee291 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -277,7 +277,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) error: sig = ksig ? ksig->sig : 0; - force_sigsegv(sig, t); + force_sigsegv(sig); } #ifdef CONFIG_DEBUG_RSEQ diff --git a/kernel/signal.c b/kernel/signal.c index 39a3eca5ce22..f7669d240ce4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1607,8 +1607,10 @@ EXPORT_SYMBOL(force_sig); * the problem was already a SIGSEGV, we'll want to * make sure we don't even try to deliver the signal.. */ -void force_sigsegv(int sig, struct task_struct *p) +void force_sigsegv(int sig) { + struct task_struct *p = current; + if (sig == SIGSEGV) { unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); @@ -2717,7 +2719,7 @@ static void signal_delivered(struct ksignal *ksig, int stepping) void signal_setup_done(int failed, struct ksignal *ksig, int stepping) { if (failed) - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); else signal_delivered(ksig, stepping); } -- cgit v1.2.3 From 3cf5d076fb4d48979f382bc9452765bf8b79e740 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 23 May 2019 10:17:27 -0500 Subject: signal: Remove task parameter from force_sig All of the remaining callers pass current into force_sig so remove the task parameter to make this obvious and to make misuse more difficult in the future. This also makes it clear force_sig passes current into force_sig_info. Signed-off-by: "Eric W. Biederman" --- arch/alpha/kernel/signal.c | 4 ++-- arch/arc/kernel/process.c | 2 +- arch/arc/kernel/signal.c | 2 +- arch/arm/kernel/signal.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/c6x/kernel/signal.c | 2 +- arch/csky/kernel/signal.c | 4 +--- arch/h8300/kernel/ptrace_h.c | 4 ++-- arch/h8300/kernel/ptrace_s.c | 2 +- arch/h8300/kernel/signal.c | 2 +- arch/hexagon/kernel/signal.c | 2 +- arch/hexagon/kernel/traps.c | 10 ++++----- arch/ia64/kernel/signal.c | 2 +- arch/ia64/kernel/traps.c | 6 +++--- arch/m68k/kernel/signal.c | 4 ++-- arch/m68k/kernel/traps.c | 16 +++++++-------- arch/microblaze/kernel/signal.c | 2 +- arch/mips/kernel/branch.c | 18 ++++++++--------- arch/mips/kernel/kprobes.c | 2 +- arch/mips/kernel/signal.c | 8 ++++---- arch/mips/kernel/signal_n32.c | 4 ++-- arch/mips/kernel/signal_o32.c | 8 ++++---- arch/mips/kernel/traps.c | 36 ++++++++++++++++----------------- arch/mips/kernel/unaligned.c | 20 +++++++++--------- arch/mips/sgi-ip22/ip22-berr.c | 2 +- arch/mips/sgi-ip22/ip28-berr.c | 2 +- arch/mips/sgi-ip27/ip27-berr.c | 2 +- arch/mips/sgi-ip32/ip32-berr.c | 2 +- arch/nds32/kernel/signal.c | 2 +- arch/nds32/kernel/traps.c | 6 +++--- arch/nios2/kernel/signal.c | 2 +- arch/openrisc/kernel/signal.c | 2 +- arch/openrisc/kernel/traps.c | 4 ++-- arch/parisc/kernel/signal.c | 2 +- arch/powerpc/kernel/signal_32.c | 6 +++--- arch/powerpc/kernel/signal_64.c | 2 +- arch/powerpc/platforms/cell/spufs/run.c | 2 +- arch/riscv/kernel/signal.c | 2 +- arch/s390/kernel/compat_signal.c | 4 ++-- arch/s390/kernel/signal.c | 4 ++-- arch/sh/kernel/cpu/sh2a/fpu.c | 2 +- arch/sh/kernel/cpu/sh4/fpu.c | 2 +- arch/sh/kernel/cpu/sh5/fpu.c | 4 +--- arch/sh/kernel/ptrace_64.c | 4 ++-- arch/sh/kernel/signal_32.c | 4 ++-- arch/sh/kernel/signal_64.c | 4 ++-- arch/sh/kernel/traps.c | 4 ++-- arch/sh/kernel/traps_32.c | 8 +++----- arch/sh/kernel/traps_64.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/sparc/kernel/signal32.c | 4 ++-- arch/sparc/kernel/signal_32.c | 4 ++-- arch/sparc/kernel/signal_64.c | 6 +++--- arch/sparc/kernel/traps_64.c | 2 +- arch/sparc/mm/fault_32.c | 2 +- arch/um/kernel/exec.c | 2 +- arch/um/kernel/tlb.c | 4 ++-- arch/um/kernel/trap.c | 2 +- arch/unicore32/kernel/signal.c | 2 +- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/signal.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- arch/x86/kernel/uprobes.c | 2 +- arch/x86/kernel/vm86_32.c | 2 +- arch/x86/mm/mpx.c | 2 +- arch/x86/um/signal.c | 4 ++-- arch/xtensa/kernel/signal.c | 2 +- arch/xtensa/kernel/traps.c | 6 +++--- drivers/misc/lkdtm/bugs.c | 2 +- include/linux/sched/signal.h | 2 +- include/linux/syscalls.h | 2 +- kernel/events/uprobes.c | 4 ++-- kernel/rseq.c | 2 +- kernel/signal.c | 6 +++--- security/safesetid/lsm.c | 4 ++-- 76 files changed, 160 insertions(+), 166 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 33e904a05881..a813020d2f11 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -225,7 +225,7 @@ do_sigreturn(struct sigcontext __user *sc) return; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void @@ -253,7 +253,7 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) return; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 725e556678a4..deee16d5c03f 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -100,7 +100,7 @@ fault: goto again; fail: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return ret; } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 1bfb7de696bd..547c8f0cdc3a 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -197,7 +197,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return regs->r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index be5edfdde558..3870e0588d53 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -250,7 +250,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) return regs->ARM_r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -283,7 +283,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) return regs->ARM_r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e45d5b440fb1..64abe8450780 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -257,7 +257,7 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr, { arm64_show_signal(signo, str); if (signo == SIGKILL) - force_sig(SIGKILL, current); + force_sig(SIGKILL); else force_sig_fault(signo, code, addr, current); } diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c index 33b9f69c38f7..775de34b233a 100644 --- a/arch/c6x/kernel/signal.c +++ b/arch/c6x/kernel/signal.c @@ -93,7 +93,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs) return regs->a4; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 04a43cfd4e09..7c09adeb58bb 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -61,7 +61,6 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; - struct task_struct *task; sigset_t set; /* Always make any pending restarted system calls return -EINTR */ @@ -86,8 +85,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return regs->a0; badframe: - task = current; - force_sig(SIGSEGV, task); + force_sig(SIGSEGV); return 0; } diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index f5ff3b794c85..15db45a03b04 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -250,7 +250,7 @@ asmlinkage void trace_trap(unsigned long bp) { if ((unsigned long)current->thread.breakinfo.addr == bp) { user_disable_single_step(current); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } else - force_sig(SIGILL, current); + force_sig(SIGILL); } diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c index c0af930052c0..ee21f37b7ed4 100644 --- a/arch/h8300/kernel/ptrace_s.c +++ b/arch/h8300/kernel/ptrace_s.c @@ -40,5 +40,5 @@ void user_enable_single_step(struct task_struct *child) asmlinkage void trace_trap(unsigned long bp) { (void)bp; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index e0f2b708e5d9..ef7489b7c459 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -126,7 +126,7 @@ asmlinkage int sys_rt_sigreturn(void) return er0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index 31e2cf95f189..0433fcbb496c 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -265,6 +265,6 @@ asmlinkage int sys_rt_sigreturn(void) return regs->r00; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 91ee04842c22..e634414361df 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -252,7 +252,7 @@ int die_if_kernel(char *str, struct pt_regs *regs, long err) static void misaligned_instruction(struct pt_regs *regs) { die_if_kernel("Misaligned Instruction", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } /* @@ -263,19 +263,19 @@ static void misaligned_instruction(struct pt_regs *regs) static void misaligned_data_load(struct pt_regs *regs) { die_if_kernel("Misaligned Data Load", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static void misaligned_data_store(struct pt_regs *regs) { die_if_kernel("Misaligned Data Store", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static void illegal_instruction(struct pt_regs *regs) { die_if_kernel("Illegal Instruction", regs, 0); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* @@ -285,7 +285,7 @@ static void illegal_instruction(struct pt_regs *regs) static void precise_bus_error(struct pt_regs *regs) { die_if_kernel("Precise Bus Error", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } /* diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 518cceb5d4af..e5044aed9452 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -152,7 +152,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) return retval; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return retval; } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 85d8616ac4f6..0a3adbfebc2a 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -589,14 +589,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", iip, ifa, isr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; case 46: printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", iip, ifa, isr, iim); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; case 47: @@ -608,5 +608,5 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, break; } if (!die_if_kernel(buf, ®s, error)) - force_sig(SIGILL, current); + force_sig(SIGILL); } diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 87e7f3639839..05610e6924c1 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -803,7 +803,7 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return regs->d0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -825,7 +825,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return regs->d0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index b2fd000b9285..2b6e143abd73 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -431,7 +431,7 @@ static inline void bus_error030 (struct frame *fp) pr_err("BAD KERNEL BUSERR\n"); die_if_kernel("Oops", &fp->ptregs,0); - force_sig(SIGKILL, current); + force_sig(SIGKILL); return; } } else { @@ -463,7 +463,7 @@ static inline void bus_error030 (struct frame *fp) !(ssw & RW) ? "write" : "read", addr, fp->ptregs.pc); die_if_kernel ("Oops", &fp->ptregs, buserr_type); - force_sig (SIGBUS, current); + force_sig (SIGBUS); return; } @@ -493,7 +493,7 @@ static inline void bus_error030 (struct frame *fp) do_page_fault (&fp->ptregs, addr, 0); } else { pr_debug("protection fault on insn access (segv).\n"); - force_sig (SIGSEGV, current); + force_sig (SIGSEGV); } } #else @@ -571,7 +571,7 @@ static inline void bus_error030 (struct frame *fp) !(ssw & RW) ? "write" : "read", addr, fp->ptregs.pc); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } else { #if 0 @@ -598,7 +598,7 @@ static inline void bus_error030 (struct frame *fp) #endif pr_debug("Unknown SIGSEGV - 1\n"); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -621,7 +621,7 @@ static inline void bus_error030 (struct frame *fp) buserr: pr_err("BAD KERNEL BUSERR\n"); die_if_kernel("Oops",&fp->ptregs,0); - force_sig(SIGKILL, current); + force_sig(SIGKILL); return; } @@ -660,7 +660,7 @@ static inline void bus_error030 (struct frame *fp) addr, fp->ptregs.pc); pr_debug("Unknown SIGSEGV - 2\n"); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -804,7 +804,7 @@ asmlinkage void buserr_c(struct frame *fp) default: die_if_kernel("bad frame format",&fp->ptregs,0); pr_debug("Unknown SIGSEGV - 4\n"); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } } diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 0685696349bb..cdd4feb279c5 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -108,7 +108,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return rval; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 180ad081afcf..1db29957a931 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -32,7 +32,7 @@ int __isa_exception_epc(struct pt_regs *regs) /* Calculate exception PC in branch delay slot. */ if (__get_user(inst, (u16 __user *) msk_isa16_mode(epc))) { /* This should never happen because delay slot was checked. */ - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return epc; } if (cpu_has_mips16) { @@ -305,7 +305,7 @@ int __microMIPS_compute_return_epc(struct pt_regs *regs) return 0; sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -328,7 +328,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) /* Read the instruction. */ addr = (u16 __user *)msk_isa16_mode(epc); if (__get_user(inst.full, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -343,7 +343,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) case MIPS16e_jal_op: addr += 1; if (__get_user(inst2, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } fullinst = ((unsigned)inst.full << 16) | inst2; @@ -829,17 +829,17 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, sigill_dsp: pr_debug("%s: DSP branch but not DSP ASE - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; sigill_r2r6: pr_debug("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; sigill_r6: pr_debug("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; } EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn); @@ -859,7 +859,7 @@ int __compute_return_epc(struct pt_regs *regs) */ addr = (unsigned int __user *) epc; if (__get_user(insn.word, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -867,7 +867,7 @@ int __compute_return_epc(struct pt_regs *regs) unaligned: printk("%s: unaligned epc - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return -EFAULT; } diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 54cd675c5d1d..62af3ed65794 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -232,7 +232,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs, unaligned: pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return -EFAULT; } diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index d75337974ee9..f6efabcb4e92 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -641,7 +641,7 @@ asmlinkage void sys_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); /* * Don't let your children do this ... @@ -654,7 +654,7 @@ asmlinkage void sys_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } #endif /* CONFIG_TRAD_SIGNALS */ @@ -678,7 +678,7 @@ asmlinkage void sys_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -694,7 +694,7 @@ asmlinkage void sys_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } #ifdef CONFIG_TRAD_SIGNALS diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index c498b027823e..a7601e862261 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -84,7 +84,7 @@ asmlinkage void sysn32_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (compat_restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -100,7 +100,7 @@ asmlinkage void sysn32_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static int setup_rt_frame_n32(void *sig_return, struct ksignal *ksig, diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c index df259618e834..299a7a28ca33 100644 --- a/arch/mips/kernel/signal_o32.c +++ b/arch/mips/kernel/signal_o32.c @@ -171,7 +171,7 @@ asmlinkage void sys32_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (compat_restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -187,7 +187,7 @@ asmlinkage void sys32_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig, @@ -273,7 +273,7 @@ asmlinkage void sys32_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); /* * Don't let your children do this ... @@ -286,5 +286,5 @@ asmlinkage void sys32_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..a6031b045b95 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -482,7 +482,7 @@ asmlinkage void do_be(struct pt_regs *regs) goto out; die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); out: exception_exit(prev_state); @@ -765,7 +765,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 1; default: - force_sig(sig, current); + force_sig(sig); return 1; } } @@ -947,7 +947,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, break; case BRK_BUG: die_if_kernel("Kernel bug detected", regs); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); break; case BRK_MEMU: /* @@ -962,7 +962,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, return; die_if_kernel("Math emu break/trap", regs); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); break; default: scnprintf(b, sizeof(b), "%s instruction in kernel code", str); @@ -970,7 +970,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, if (si_code) { force_sig_fault(SIGTRAP, si_code, NULL, current); } else { - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } } } @@ -1063,7 +1063,7 @@ out: return; out_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1105,7 +1105,7 @@ out: return; out_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1191,7 +1191,7 @@ no_r2_instr: if (unlikely(status > 0)) { regs->cp0_epc = old_epc; /* Undo skip-over. */ regs->regs[31] = old31; - force_sig(status, current); + force_sig(status); } out: @@ -1220,7 +1220,7 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, die_if_kernel("COP2: Unhandled kernel unaligned access or invalid " "instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); return NOTIFY_OK; } @@ -1383,7 +1383,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) if (unlikely(status > 0)) { regs->cp0_epc = old_epc; /* Undo skip-over. */ regs->regs[31] = old31; - force_sig(status, current); + force_sig(status); } break; @@ -1403,7 +1403,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) * emulator too. */ if (raw_cpu_has_fpu || !cpu_has_mips_4_5_64_r2_r6) { - force_sig(SIGILL, current); + force_sig(SIGILL); break; } /* Fall through. */ @@ -1437,7 +1437,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) #else /* CONFIG_MIPS_FP_SUPPORT */ case 1: case 3: - force_sig(SIGILL, current); + force_sig(SIGILL); break; #endif /* CONFIG_MIPS_FP_SUPPORT */ @@ -1464,7 +1464,7 @@ asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr) local_irq_enable(); die_if_kernel("do_msa_fpe invoked from kernel context!", regs); - force_sig(SIGFPE, current); + force_sig(SIGFPE); out: exception_exit(prev_state); } @@ -1477,7 +1477,7 @@ asmlinkage void do_msa(struct pt_regs *regs) prev_state = exception_enter(); if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) { - force_sig(SIGILL, current); + force_sig(SIGILL); goto out; } @@ -1485,7 +1485,7 @@ asmlinkage void do_msa(struct pt_regs *regs) err = enable_restore_fp_context(1); if (err) - force_sig(SIGILL, current); + force_sig(SIGILL); out: exception_exit(prev_state); } @@ -1495,7 +1495,7 @@ asmlinkage void do_mdmx(struct pt_regs *regs) enum ctx_state prev_state; prev_state = exception_enter(); - force_sig(SIGILL, current); + force_sig(SIGILL); exception_exit(prev_state); } @@ -1592,7 +1592,7 @@ asmlinkage void do_mt(struct pt_regs *regs) } die_if_kernel("MIPS MT Thread exception in kernel", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } @@ -1601,7 +1601,7 @@ asmlinkage void do_dsp(struct pt_regs *regs) if (cpu_has_dsp) panic("Unexpected DSP exception"); - force_sig(SIGILL, current); + force_sig(SIGILL); } asmlinkage void do_reserved(struct pt_regs *regs) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 76e33f940971..92bd2b0f0548 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -1365,20 +1365,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* Recode table from 16-bit register notation to 32-bit GPR. */ @@ -1991,20 +1991,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr) @@ -2271,20 +2271,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } asmlinkage void do_ade(struct pt_regs *regs) @@ -2364,7 +2364,7 @@ asmlinkage void do_ade(struct pt_regs *regs) sigbus: die_if_kernel("Kernel unaligned instruction access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); /* * XXX On return from the signal handler we should advance the epc diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index 34bb9801d5ff..dc0110a607a5 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -98,7 +98,7 @@ void ip22_be_interrupt(int irq) field, regs->cp0_epc, field, regs->regs[31]); /* Assume it would be too dangerous to continue ... */ die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static int ip22_be_handler(struct pt_regs *regs, int is_fixup) diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 082541d33161..c0cf7baee36d 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -462,7 +462,7 @@ void ip22_be_interrupt(int irq) if (ip28_be_interrupt(regs) != MIPS_BE_DISCARD) { /* Assume it would be too dangerous to continue ... */ die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } else if (debug_be_interrupt) show_regs(regs); } diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 83efe03d5c60..73ad29b180fb 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -74,7 +74,7 @@ int ip27_be_handler(struct pt_regs *regs, int is_fixup) show_regs(regs); dump_tlb_all(); while(1); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } void __init ip27_be_init(void) diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index c1f12a9cf305..c860f95ab7ed 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -29,7 +29,7 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup) show_regs(regs); dump_tlb_all(); while(1); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } void __init ip32_be_init(void) diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 5f7660aa2d68..fe61513982b4 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -163,7 +163,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return regs->uregs[0]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 5aa7c17da27a..8d84b8b30eb6 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -288,7 +288,7 @@ void unhandled_interruption(struct pt_regs *regs) show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } void unhandled_exceptions(unsigned long entry, unsigned long addr, @@ -299,7 +299,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr, show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } extern int do_page_fault(unsigned long entry, unsigned long addr, @@ -326,7 +326,7 @@ void do_revinsn(struct pt_regs *regs) show_regs(regs); if (!user_mode(regs)) do_exit(SIGILL); - force_sig(SIGILL, current); + force_sig(SIGILL); } #ifdef CONFIG_ALIGNMENT_TRAP diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 9bf38531b189..a42dd09c6578 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -120,7 +120,7 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) return rval; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 5ac9d3b1d615..0337d1e1d2d5 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -99,7 +99,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) return regs->gpr[11]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 6ed7293ef007..0fad2e46ff43 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -376,7 +376,7 @@ static inline void simulate_lwa(struct pt_regs *regs, unsigned long address, if (get_user(value, lwa_addr)) { if (user_mode(regs)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -423,7 +423,7 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address, if (put_user(regs->gpr[rb], vaddr)) { if (user_mode(regs)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 848c1934680b..02895a8f2c55 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -164,7 +164,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) give_sigsegv: DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n"); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ede4f04281ae..fd48cdc0a4ff 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1249,7 +1249,7 @@ SYSCALL_DEFINE0(rt_sigreturn) current->comm, current->pid, rt_sf, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -1338,7 +1338,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, current->comm, current->pid, ctx, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1516,6 +1516,6 @@ badframe: current->comm, current->pid, addr, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 06c299ef6132..ea08d848f558 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -812,7 +812,7 @@ badframe: current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 07f82d7395ff..3f2380f40f99 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -443,7 +443,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) { - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); ret = -ERESTARTSYS; } diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 804d6ee4f3c5..50c0e64372b0 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -139,7 +139,7 @@ badframe: task->comm, task_pid_nr(task), __func__, frame, (void *)regs->sepc, (void *)regs->sp); } - force_sig(SIGSEGV, task); + force_sig(SIGSEGV); return 0; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6f2a193ccccc..38d4bdbc34b9 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -194,7 +194,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -217,7 +217,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 22f08245aa5d..e6fca5498e1f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -232,7 +232,7 @@ SYSCALL_DEFINE0(sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -256,7 +256,7 @@ SYSCALL_DEFINE0(rt_sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 74b48db86dd7..0bcff11a4843 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -568,5 +568,5 @@ BUILD_TRAP_HANDLER(fpu_error) return; } - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index 1ff56e5ba990..03ffd8cdf542 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -421,5 +421,5 @@ BUILD_TRAP_HANDLER(fpu_error) } } - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c index 9218d9ed787e..3966b5ee8e93 100644 --- a/arch/sh/kernel/cpu/sh5/fpu.c +++ b/arch/sh/kernel/cpu/sh5/fpu.c @@ -100,9 +100,7 @@ void restore_fpu(struct task_struct *tsk) asmlinkage void do_fpu_error(unsigned long ex, struct pt_regs *regs) { - struct task_struct *tsk = current; - regs->pc += 4; - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 3390349ff976..11085e48eaa6 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -550,7 +550,7 @@ asmlinkage void do_single_step(unsigned long long vec, struct pt_regs *regs) continually stepping. */ local_irq_enable(); regs->sr &= ~SR_SSTEP; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } /* Called with interrupts disabled */ @@ -561,7 +561,7 @@ BUILD_TRAP_HANDLER(breakpoint) /* We need to forward step the PC, to counteract the backstep done in signal.c. */ local_irq_enable(); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); regs->pc += 4; } diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 2a2121ba8ebe..24473fa6c3b6 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -176,7 +176,7 @@ asmlinkage int sys_sigreturn(void) return r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -207,7 +207,7 @@ asmlinkage int sys_rt_sigreturn(void) return r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index f1f1598879c2..b9aaa9266b34 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -277,7 +277,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3, return (int) ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -311,7 +311,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3, return (int) ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 8b49cced663d..63cf17bc760d 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -141,7 +141,7 @@ BUILD_TRAP_HANDLER(debug) SIGTRAP) == NOTIFY_STOP) return; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } /* @@ -167,7 +167,7 @@ BUILD_TRAP_HANDLER(bug) } #endif - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } BUILD_TRAP_HANDLER(nmi) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index f2a18b5fafd8..bd5568c8e7f0 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -611,7 +611,6 @@ asmlinkage void do_reserved_inst(void) { struct pt_regs *regs = current_pt_regs(); unsigned long error_code; - struct task_struct *tsk = current; #ifdef CONFIG_SH_FPU_EMU unsigned short inst = 0; @@ -633,7 +632,7 @@ asmlinkage void do_reserved_inst(void) /* Enable DSP mode, and restart instruction. */ regs->sr |= SR_DSP; /* Save DSP mode */ - tsk->thread.dsp_status.status |= SR_DSP; + current->thread.dsp_status.status |= SR_DSP; return; } #endif @@ -641,7 +640,7 @@ asmlinkage void do_reserved_inst(void) error_code = lookup_exception_vector(); local_irq_enable(); - force_sig(SIGILL, tsk); + force_sig(SIGILL); die_if_no_fixup("reserved instruction", regs, error_code); } @@ -697,7 +696,6 @@ asmlinkage void do_illegal_slot_inst(void) { struct pt_regs *regs = current_pt_regs(); unsigned long inst; - struct task_struct *tsk = current; if (kprobe_handle_illslot(regs->pc) == 0) return; @@ -716,7 +714,7 @@ asmlinkage void do_illegal_slot_inst(void) inst = lookup_exception_vector(); local_irq_enable(); - force_sig(SIGILL, tsk); + force_sig(SIGILL); die_if_no_fixup("illegal slot instruction", regs, inst); } diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 8ce90a7da67d..37046f3a26d3 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -599,7 +599,7 @@ static void do_unhandled_exception(int signr, char *str, unsigned long error, struct pt_regs *regs) { if (user_mode(regs)) - force_sig(signr, current); + force_sig(signr); die_if_no_fixup(str, regs, error); } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 59eaf6227af1..c4bccd97f3cf 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -570,7 +570,7 @@ void fault_in_user_windows(struct pt_regs *regs) barf: set_thread_wsaved(window + 1); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage long sparc_do_fork(unsigned long clone_flags, diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index fb431d47a532..a237810aa9f4 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -170,7 +170,7 @@ void do_sigreturn32(struct pt_regs *regs) return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) @@ -256,7 +256,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 83953780ca01..42c3de313fd6 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -137,7 +137,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) return; segv_and_exit: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void do_rt_sigreturn(struct pt_regs *regs) @@ -196,7 +196,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 9d50190cf312..69ae814b7e90 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -134,7 +134,7 @@ out: exception_exit(prev_state); return; do_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -228,7 +228,7 @@ out: exception_exit(prev_state); return; do_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -320,7 +320,7 @@ void do_rt_sigreturn(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 0cd02a64a451..12bfc7e215ca 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2181,7 +2181,7 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, addr += PAGE_SIZE; } } - force_sig(SIGKILL, current); + force_sig(SIGKILL); return true; } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index b0440b0edd97..2731faf415ba 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -425,7 +425,7 @@ do_sigbus: static void check_stack_aligned(unsigned long sp) { if (sp & 0x7UL) - force_sig(SIGILL, current); + force_sig(SIGILL); } void window_overflow_fault(void) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index a43d42bf0a86..783b9247161f 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -32,7 +32,7 @@ void flush_thread(void) if (ret) { printk(KERN_ERR "flush_thread - clearing address space failed, " "err = %d\n", ret); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } get_safe_registers(current_pt_regs()->regs.gp, current_pt_regs()->regs.fp); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 8347161c2ae0..45f739bf302f 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -329,7 +329,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, "process: %d\n", task_tgid_vnr(current)); /* We are under mmap_sem, release it such that current can terminate */ up_write(¤t->mm->mmap_sem); - force_sig(SIGKILL, current); + force_sig(SIGKILL); do_signal(¤t->thread.regs); } } @@ -487,7 +487,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) kill: printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 646059402ab3..1c943c66063f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -309,7 +309,7 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } else { printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", sig, code, err); - force_sig(sig, current); + force_sig(sig); } } diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c index 75f27dc68bd0..070fa58d23a9 100644 --- a/arch/unicore32/kernel/signal.c +++ b/arch/unicore32/kernel/signal.c @@ -129,7 +129,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs) return regs->UCreg_00; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..7ea87f4ad0b7 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -268,7 +268,7 @@ do_ret: return true; sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return true; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5112a50e6486..e11ac124dd37 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1329,7 +1329,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) local_irq_enable(); if (kill_it || do_memory_failure(&m)) - force_sig(SIGBUS, current); + force_sig(SIGBUS); local_irq_disable(); ist_end_non_atomic(); } else { diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 364813cea647..7cf508f78c8c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) pr_cont("\n"); } - force_sig(SIGSEGV, me); + force_sig(SIGSEGV); } #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8b6d03e55d2f..e54f0cad4b2e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -254,7 +254,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, show_signal(tsk, signr, "trap ", str, regs, error_code); if (!sicode) - force_sig(signr, tsk); + force_sig(signr); else force_sig_fault(signr, sicode, addr, tsk); } @@ -566,7 +566,7 @@ do_general_protection(struct pt_regs *regs, long error_code) show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig(SIGSEGV, tsk); + force_sig(SIGSEGV); } NOKPROBE_SYMBOL(do_general_protection); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ccf03416e434..18239d5a8b53 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1087,7 +1087,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } return -1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 6a38717d179c..a76c12b38e92 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) return 1; /* we let this handle by the calling routine */ current->thread.trap_nr = trapno; current->thread.error_code = error_code; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); return 0; } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 0d1c47cbbdd6..895fb7a9294d 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -912,7 +912,7 @@ void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, ret = mpx_unmap_tables(mm, start, end); if (ret) - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } /* MPX cannot handle addresses above 47 bits yet. */ diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 8b4a71efe7ee..7c11c9e5d7ea 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -471,7 +471,7 @@ long sys_sigreturn(void) return PT_REGS_SYSCALL_RET(¤t->thread.regs); segfault: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -577,6 +577,6 @@ long sys_rt_sigreturn(void) return PT_REGS_SYSCALL_RET(¤t->thread.regs); segfault: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index dc22a238ed9c..fbedf2aba09d 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -270,7 +270,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3, return ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 454d53096bc9..6f26b254091b 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -184,7 +184,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause) "\tEXCCAUSE is %ld\n", current->comm, task_pid_nr(current), regs->pc, exccause); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* @@ -306,7 +306,7 @@ do_illegal_instruction(struct pt_regs *regs) pr_info_ratelimited("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n", current->comm, task_pid_nr(current), regs->pc); - force_sig(SIGILL, current); + force_sig(SIGILL); } @@ -354,7 +354,7 @@ do_debug(struct pt_regs *regs) /* If in user mode, send SIGTRAP signal to current process */ - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 7eebbdfbcacd..86556adb1482 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -225,7 +225,7 @@ void lkdtm_CORRUPT_USER_DS(void) set_fs(KERNEL_DS); /* Make sure we do not keep running with a KERNEL_DS! */ - force_sig(SIGKILL, current); + force_sig(SIGKILL); } /* Test that VMAP_STACK is actually allocating with a leading guard page */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 8af3101da782..e9df3f0cce48 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -335,7 +335,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); +extern void force_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e2870fe1be5b..fd6e0f5ebfdf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -266,7 +266,7 @@ static inline void addr_limit_user_check(void) if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), "Invalid address limit on user-mode return")) - force_sig(SIGKILL, current); + force_sig(SIGKILL); #ifdef TIF_FSCHECK clear_thread_flag(TIF_FSCHECK); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 78f61bfc6b79..359122185cfb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2112,7 +2112,7 @@ static void handle_trampoline(struct pt_regs *regs) sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); - force_sig(SIGILL, current); + force_sig(SIGILL); } @@ -2228,7 +2228,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); - force_sig(SIGILL, current); + force_sig(SIGILL); } } diff --git a/kernel/rseq.c b/kernel/rseq.c index e1aa3ebee291..27c48eb7de40 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -296,7 +296,7 @@ void rseq_syscall(struct pt_regs *regs) return; if (!access_ok(t->rseq, sizeof(*t->rseq)) || rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) - force_sig(SIGSEGV, t); + force_sig(SIGSEGV); } #endif diff --git a/kernel/signal.c b/kernel/signal.c index f7669d240ce4..20878c4c28c2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1595,9 +1595,9 @@ send_sig(int sig, struct task_struct *p, int priv) } EXPORT_SYMBOL(send_sig); -void force_sig(int sig, struct task_struct *p) +void force_sig(int sig) { - force_sig_info(sig, SEND_SIG_PRIV, p); + force_sig_info(sig, SEND_SIG_PRIV, current); } EXPORT_SYMBOL(force_sig); @@ -1617,7 +1617,7 @@ void force_sigsegv(int sig) p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; spin_unlock_irqrestore(&p->sighand->siglock, flags); } - force_sig(SIGSEGV, p); + force_sig(SIGSEGV); } int force_sig_fault(int sig, int code, void __user *addr diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c index cecd38e2ac80..06d4259f9ab1 100644 --- a/security/safesetid/lsm.c +++ b/security/safesetid/lsm.c @@ -111,7 +111,7 @@ static int check_uid_transition(kuid_t parent, kuid_t child) * that could arise from a missing whitelist entry preventing a * privileged process from dropping to a lesser-privileged one. */ - force_sig(SIGKILL, current); + force_sig(SIGKILL); return -EACCES; } @@ -203,7 +203,7 @@ static int safesetid_task_fix_setuid(struct cred *new, break; default: pr_warn("Unknown setid state %d\n", flags); - force_sig(SIGKILL, current); + force_sig(SIGKILL); return -EINVAL; } return 0; -- cgit v1.2.3 From f8eac9011b6be56acfb5d1d0dfd5ee30082a12ee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 5 Feb 2019 18:14:19 -0600 Subject: signal: Remove task parameter from force_sig_mceerr All of the callers pass current into force_sig_mceer so remove the task parameter to make this obvious. This also makes it clear that force_sig_mceerr passes current into force_sig_info. Signed-off-by: "Eric W. Biederman" --- arch/arm64/kernel/traps.c | 2 +- arch/parisc/mm/fault.c | 2 +- arch/powerpc/mm/fault.c | 3 +-- arch/x86/mm/fault.c | 2 +- include/linux/sched/signal.h | 2 +- kernel/signal.c | 4 ++-- mm/memory-failure.c | 2 +- 7 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 64abe8450780..c76a64c1bcb3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -266,7 +266,7 @@ void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str) { arm64_show_signal(SIGBUS, str); - force_sig_mceerr(code, addr, lsb, current); + force_sig_mceerr(code, addr, lsb); } void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index c8e8b7c05558..56ceacb3401d 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -403,7 +403,7 @@ bad_area: lsb = PAGE_SHIFT; force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address, - lsb, current); + lsb); return; } #endif diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b5d3578d9f65..6ed6c341c670 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -182,8 +182,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, if (fault & VM_FAULT_HWPOISON) lsb = PAGE_SHIFT; - force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, - current); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); return 0; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 46df4c6aae46..c431326ee3fa 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1040,7 +1040,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); if (fault & VM_FAULT_HWPOISON) lsb = PAGE_SHIFT; - force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); return; } #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e9df3f0cce48..4178bb1f7709 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -316,7 +316,7 @@ int send_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) , struct task_struct *t); -int force_sig_mceerr(int code, void __user *, short, struct task_struct *); +int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); diff --git a/kernel/signal.c b/kernel/signal.c index 20878c4c28c2..398489facf9f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1666,7 +1666,7 @@ int send_sig_fault(int sig, int code, void __user *addr return send_sig_info(info.si_signo, &info, t); } -int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) +int force_sig_mceerr(int code, void __user *addr, short lsb) { struct kernel_siginfo info; @@ -1677,7 +1677,7 @@ int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct info.si_code = code; info.si_addr = addr; info.si_addr_lsb = lsb; - return force_sig_info(info.si_signo, &info, t); + return force_sig_info(info.si_signo, &info, current); } int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fc8b51744579..bc749265a8f3 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -221,7 +221,7 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags) if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr, - addr_lsb, current); + addr_lsb); } else { /* * Don't use force here, it's convenient if the signal -- cgit v1.2.3 From 7423e01741dd6a5f1255f589145313f0fb1c8cbe Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 25 May 2019 13:41:22 +0800 Subject: iommu: Add API to request DMA domain for device Normally during iommu probing a device, a default doamin will be allocated and attached to the device. The domain type of the default domain is statically defined, which results in a situation where the allocated default domain isn't suitable for the device due to some limitations. We already have API iommu_request_dm_for_dev() to replace a DMA domain with an identity one. This adds iommu_request_dma_domain_for_dev() to request a dma domain if an allocated identity domain isn't suitable for the device in question. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 36 +++++++++++++++++++++++++----------- include/linux/iommu.h | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 67ee6623f9b2..2fca04c3dbaf 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1907,10 +1907,10 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, return region; } -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) +static int +request_default_domain_for_dev(struct device *dev, unsigned long type) { - struct iommu_domain *dm_domain; + struct iommu_domain *domain; struct iommu_group *group; int ret; @@ -1923,8 +1923,7 @@ int iommu_request_dm_for_dev(struct device *dev) /* Check if the default domain is already direct mapped */ ret = 0; - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY) + if (group->default_domain && group->default_domain->type == type) goto out; /* Don't change mappings of existing devices */ @@ -1934,23 +1933,26 @@ int iommu_request_dm_for_dev(struct device *dev) /* Allocate a direct mapped domain */ ret = -ENOMEM; - dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY); - if (!dm_domain) + domain = __iommu_domain_alloc(dev->bus, type); + if (!domain) goto out; /* Attach the device to the domain */ - ret = __iommu_attach_group(dm_domain, group); + ret = __iommu_attach_group(domain, group); if (ret) { - iommu_domain_free(dm_domain); + iommu_domain_free(domain); goto out; } + iommu_group_create_direct_mappings(group, dev); + /* Make the direct mapped domain the default for this group */ if (group->default_domain) iommu_domain_free(group->default_domain); - group->default_domain = dm_domain; + group->default_domain = domain; - dev_info(dev, "Using iommu direct mapping\n"); + dev_info(dev, "Using iommu %s mapping\n", + type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); ret = 0; out: @@ -1960,6 +1962,18 @@ out: return ret; } +/* Request that a device is direct mapped by the IOMMU */ +int iommu_request_dm_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY); +} + +/* Request that a device can't be direct mapped by the IOMMU */ +int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA); +} + const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { const struct iommu_ops *ops = NULL; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a815cf6f6f47..91af22a344e2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -362,6 +362,7 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern int iommu_request_dma_domain_for_dev(struct device *dev); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, enum iommu_resv_type type); @@ -626,6 +627,11 @@ static inline int iommu_request_dm_for_dev(struct device *dev) return -ENODEV; } +static inline int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return -ENODEV; +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { -- cgit v1.2.3 From 185da893fab1caa458c47f032a6f53717dbae2eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 May 2019 09:29:26 +0200 Subject: iommu/dma: Cleanup dma-iommu.h No need for a __KERNEL__ guard outside uapi and add a missing comment describing the #else cpp statement. Last but not least include instead of the asm version, which is frowned upon. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 476e0c54de2d..dfb83f9c24dc 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -16,9 +16,8 @@ #ifndef __DMA_IOMMU_H #define __DMA_IOMMU_H -#ifdef __KERNEL__ +#include #include -#include #ifdef CONFIG_IOMMU_DMA #include @@ -86,7 +85,7 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); -#else +#else /* CONFIG_IOMMU_DMA */ struct iommu_domain; struct msi_desc; @@ -128,5 +127,4 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he } #endif /* CONFIG_IOMMU_DMA */ -#endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From af751d4308a7c80434b5f40fd44288d33dc1962f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 May 2019 09:29:27 +0200 Subject: iommu/dma: Remove the flush_page callback We now have a arch_dma_prep_coherent architecture hook that is used for the generic DMA remap allocator, and we should use the same interface for the dma-iommu code. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Acked-by: Catalin Marinas Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 8 +------- drivers/iommu/dma-iommu.c | 8 +++----- include/linux/dma-iommu.h | 3 +-- 3 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 674860e3e478..10a8852c8b6a 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -104,12 +104,6 @@ arch_initcall(arm64_dma_init); #include #include -/* Thankfully, all cache ops are by VA so we can ignore phys here */ -static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) -{ - __dma_flush_area(virt, PAGE_SIZE); -} - static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) @@ -186,7 +180,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, struct page **pages; pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); + handle); if (!pages) return NULL; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 129c4badf9ae..aac12433ffef 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -560,8 +561,6 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, * @attrs: DMA attributes for this allocation * @prot: IOMMU mapping flags * @handle: Out argument for allocated DMA handle - * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the - * given VA/PA are visible to the given non-coherent device. * * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, * but an IOMMU which supports smaller pages might not map the whole thing. @@ -570,8 +569,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, * or NULL on failure. */ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - unsigned long attrs, int prot, dma_addr_t *handle, - void (*flush_page)(struct device *, const void *, phys_addr_t)) + unsigned long attrs, int prot, dma_addr_t *handle) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -615,7 +613,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, */ sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG); while (sg_miter_next(&miter)) - flush_page(dev, miter.addr, page_to_phys(miter.page)); + arch_dma_prep_coherent(miter.page, PAGE_SIZE); sg_miter_stop(&miter); } diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index dfb83f9c24dc..e1ef265b578b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -44,8 +44,7 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent, * the arch code to take care of attributes and cache maintenance */ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - unsigned long attrs, int prot, dma_addr_t *handle, - void (*flush_page)(struct device *, const void *, phys_addr_t)); + unsigned long attrs, int prot, dma_addr_t *handle); void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle); -- cgit v1.2.3 From 06d60728ff5c01795ac0bad66a5c42e3e78dcb6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 May 2019 09:29:29 +0200 Subject: iommu/dma: move the arm64 wrappers to common code There is nothing really arm64 specific in the iommu_dma_ops implementation, so move it to dma-iommu.c and keep a lot of symbols self-contained. Note the implementation does depend on the DMA_DIRECT_REMAP infrastructure for now, so we'll have to make the DMA_IOMMU support depend on it, but this will be relaxed soon. Signed-off-by: Christoph Hellwig Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 394 +------------------------------------------ drivers/iommu/Kconfig | 1 + drivers/iommu/dma-iommu.c | 398 ++++++++++++++++++++++++++++++++++++++++---- include/linux/dma-iommu.h | 42 +---- 4 files changed, 378 insertions(+), 457 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 10a8852c8b6a..d1661f78eb4d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -58,37 +59,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } -#ifdef CONFIG_IOMMU_DMA -static int __swiotlb_get_sgtable_page(struct sg_table *sgt, - struct page *page, size_t size) -{ - int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - - return ret; -} - -static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, - unsigned long pfn, size_t size) -{ - int ret = -ENXIO; - unsigned long nr_vma_pages = vma_pages(vma); - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - - if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); - } - - return ret; -} -#endif /* CONFIG_IOMMU_DMA */ - static int __init arm64_dma_init(void) { WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), @@ -100,374 +70,18 @@ static int __init arm64_dma_init(void) arch_initcall(arm64_dma_init); #ifdef CONFIG_IOMMU_DMA -#include -#include -#include - -static void *__iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); - size_t iosize = size; - void *addr; - - if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) - return NULL; - - size = PAGE_ALIGN(size); - - /* - * Some drivers rely on this, and we probably don't want the - * possibility of stale kernel data being read by devices anyway. - */ - gfp |= __GFP_ZERO; - - if (!gfpflags_allow_blocking(gfp)) { - struct page *page; - /* - * In atomic context we can't remap anything, so we'll only - * get the virtually contiguous buffer we need by way of a - * physically contiguous allocation. - */ - if (coherent) { - page = alloc_pages(gfp, get_order(size)); - addr = page ? page_address(page) : NULL; - } else { - addr = dma_alloc_from_pool(size, &page, gfp); - } - if (!addr) - return NULL; - - *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (*handle == DMA_MAPPING_ERROR) { - if (coherent) - __free_pages(page, get_order(size)); - else - dma_free_from_pool(addr, size); - addr = NULL; - } - } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); - struct page *page; - - page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size), gfp & __GFP_NOWARN); - if (!page) - return NULL; - - *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (*handle == DMA_MAPPING_ERROR) { - dma_release_from_contiguous(dev, page, - size >> PAGE_SHIFT); - return NULL; - } - addr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, - __builtin_return_address(0)); - if (addr) { - if (!coherent) - __dma_flush_area(page_to_virt(page), iosize); - memset(addr, 0, size); - } else { - iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); - dma_release_from_contiguous(dev, page, - size >> PAGE_SHIFT); - } - } else { - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); - struct page **pages; - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } - return addr; -} - -static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) -{ - size_t iosize = size; - - size = PAGE_ALIGN(size); - /* - * @cpu_addr will be one of 4 things depending on how it was allocated: - * - A remapped array of pages for contiguous allocations. - * - A remapped array of pages from iommu_dma_alloc(), for all - * non-atomic allocations. - * - A non-cacheable alias from the atomic pool, for atomic - * allocations by non-coherent devices. - * - A normal lowmem address, for atomic allocations by - * coherent devices. - * Hence how dodgy the below logic looks... - */ - if (dma_in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - dma_free_from_pool(cpu_addr, size); - } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - struct page *page = vmalloc_to_page(cpu_addr); - - iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); - dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); - dma_common_free_remap(cpu_addr, size, VM_USERMAP); - } else if (is_vmalloc_addr(cpu_addr)){ - struct vm_struct *area = find_vm_area(cpu_addr); - - if (WARN_ON(!area || !area->pages)) - return; - iommu_dma_free(dev, area->pages, iosize, &handle); - dma_common_free_remap(cpu_addr, size, VM_USERMAP); - } else { - iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - __free_pages(virt_to_page(cpu_addr), get_order(size)); - } -} - -static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - struct vm_struct *area; - int ret; - - vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (!is_vmalloc_addr(cpu_addr)) { - unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); - return __swiotlb_mmap_pfn(vma, pfn, size); - } - - if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - /* - * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, - * hence in the vmalloc space. - */ - unsigned long pfn = vmalloc_to_pfn(cpu_addr); - return __swiotlb_mmap_pfn(vma, pfn, size); - } - - area = find_vm_area(cpu_addr); - if (WARN_ON(!area || !area->pages)) - return -ENXIO; - - return iommu_dma_mmap(area->pages, size, vma); -} - -static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, - size_t size, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct vm_struct *area = find_vm_area(cpu_addr); - - if (!is_vmalloc_addr(cpu_addr)) { - struct page *page = virt_to_page(cpu_addr); - return __swiotlb_get_sgtable_page(sgt, page, size); - } - - if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - /* - * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, - * hence in the vmalloc space. - */ - struct page *page = vmalloc_to_page(cpu_addr); - return __swiotlb_get_sgtable_page(sgt, page, size); - } - - if (WARN_ON(!area || !area->pages)) - return -ENXIO; - - return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, - GFP_KERNEL); -} - -static void __iommu_sync_single_for_cpu(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - phys_addr_t phys; - - if (dev_is_dma_coherent(dev)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); - arch_sync_dma_for_cpu(dev, phys, size, dir); -} - -static void __iommu_sync_single_for_device(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - phys_addr_t phys; - - if (dev_is_dma_coherent(dev)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); - arch_sync_dma_for_device(dev, phys, size, dir); -} - -static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - int prot = dma_info_to_prot(dir, coherent, attrs); - dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - - if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - dev_addr != DMA_MAPPING_ERROR) - __dma_map_area(page_address(page) + offset, size, dir); - - return dev_addr; -} - -static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); - - iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); -} - -static void __iommu_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (dev_is_dma_coherent(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void __iommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (dev_is_dma_coherent(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_sg_for_device(dev, sgl, nelems, dir); - - return iommu_dma_map_sg(dev, sgl, nelems, - dma_info_to_prot(dir, coherent, attrs)); -} - -static void __iommu_unmap_sg_attrs(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs) -{ - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); - - iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); -} - -static const struct dma_map_ops iommu_dma_ops = { - .alloc = __iommu_alloc_attrs, - .free = __iommu_free_attrs, - .mmap = __iommu_mmap_attrs, - .get_sgtable = __iommu_get_sgtable, - .map_page = __iommu_map_page, - .unmap_page = __iommu_unmap_page, - .map_sg = __iommu_map_sg_attrs, - .unmap_sg = __iommu_unmap_sg_attrs, - .sync_single_for_cpu = __iommu_sync_single_for_cpu, - .sync_single_for_device = __iommu_sync_single_for_device, - .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, - .sync_sg_for_device = __iommu_sync_sg_for_device, - .map_resource = iommu_dma_map_resource, - .unmap_resource = iommu_dma_unmap_resource, -}; - -static int __init __iommu_dma_init(void) -{ - return iommu_dma_init(); -} -arch_initcall(__iommu_dma_init); - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *ops) -{ - struct iommu_domain *domain; - - if (!ops) - return; - - /* - * The IOMMU core code allocates the default DMA domain, which the - * underlying IOMMU driver needs to support via the dma-iommu layer. - */ - domain = iommu_get_domain_for_dev(dev); - - if (!domain) - goto out_err; - - if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) - goto out_err; - - dev->dma_ops = &iommu_dma_ops; - } - - return; - -out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); -} - void arch_teardown_dma_ops(struct device *dev) { dev->dma_ops = NULL; } - -#else - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu) -{ } - -#endif /* CONFIG_IOMMU_DMA */ +#endif void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { dev->dma_coherent = coherent; - __iommu_setup_dma_ops(dev, dma_base, size, iommu); + if (iommu) + iommu_setup_dma_ops(dev, dma_base, size); #ifdef CONFIG_XEN if (xen_initial_domain()) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 83664db5221d..d6d063160dd6 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -97,6 +97,7 @@ config IOMMU_DMA select IOMMU_IOVA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH + depends on DMA_DIRECT_REMAP config FSL_PAMU bool "Freescale IOMMU support" diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9b7f120d7381..e34ba23353cb 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -79,11 +80,6 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) return cookie; } -int iommu_dma_init(void) -{ - return iova_cache_get(); -} - /** * iommu_get_dma_cookie - Acquire DMA-API resources for a domain * @domain: IOMMU domain to prepare for DMA-API usage @@ -314,7 +310,7 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ -int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, +static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -365,7 +361,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, return iova_reserve_iommu_regions(dev, domain); } -EXPORT_SYMBOL(iommu_dma_init_domain); /** * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API @@ -376,7 +371,7 @@ EXPORT_SYMBOL(iommu_dma_init_domain); * * Return: corresponding IOMMU API page protection flags */ -int dma_info_to_prot(enum dma_data_direction dir, bool coherent, +static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, unsigned long attrs) { int prot = coherent ? IOMMU_CACHE : 0; @@ -535,17 +530,17 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, } /** - * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc() + * iommu_dma_free - Free a buffer allocated by __iommu_dma_alloc() * @dev: Device which owns this buffer - * @pages: Array of buffer pages as returned by iommu_dma_alloc() + * @pages: Array of buffer pages as returned by __iommu_dma_alloc() * @size: Size of buffer in bytes * @handle: DMA address of buffer * * Frees both the pages associated with the buffer, and the array * describing them */ -void iommu_dma_free(struct device *dev, struct page **pages, size_t size, - dma_addr_t *handle) +static void __iommu_dma_free(struct device *dev, struct page **pages, + size_t size, dma_addr_t *handle) { __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); @@ -553,7 +548,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, } /** - * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space + * __iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space * @dev: Device to allocate memory for. Must be a real device * attached to an iommu_dma_domain * @size: Size of buffer in bytes @@ -568,8 +563,8 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, * Return: Array of struct page pointers describing the buffer, * or NULL on failure. */ -struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - unsigned long attrs, int prot, dma_addr_t *handle) +static struct page **__iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, unsigned long attrs, int prot, dma_addr_t *handle) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -631,20 +626,72 @@ out_free_pages: } /** - * iommu_dma_mmap - Map a buffer into provided user VMA - * @pages: Array representing buffer from iommu_dma_alloc() + * __iommu_dma_mmap - Map a buffer into provided user VMA + * @pages: Array representing buffer from __iommu_dma_alloc() * @size: Size of buffer in bytes * @vma: VMA describing requested userspace mapping * * Maps the pages of the buffer in @pages into @vma. The caller is responsible * for verifying the correct size and protection of @vma beforehand. */ - -int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) +static int __iommu_dma_mmap(struct page **pages, size_t size, + struct vm_area_struct *vma) { return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT); } +static void iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (dev_is_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + arch_sync_dma_for_cpu(dev, phys, size, dir); +} + +static void iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (dev_is_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + arch_sync_dma_for_device(dev, phys, size, dir); +} + +static void iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); +} + +static void iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, struct iommu_domain *domain) { @@ -668,19 +715,44 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return iova + iova_off; } -dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, +static dma_addr_t __iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, int prot) { return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, iommu_get_dma_domain(dev)); } -void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, - enum dma_data_direction dir, unsigned long attrs) +static void __iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } +static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + bool coherent = dev_is_dma_coherent(dev); + dma_addr_t dma_handle; + + dma_handle =__iommu_dma_map(dev, phys, size, + dma_info_to_prot(dir, coherent, attrs), + iommu_get_dma_domain(dev)); + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + dma_handle != DMA_MAPPING_ERROR) + arch_sync_dma_for_device(dev, phys, size, dir); + return dma_handle; +} + +static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir); + __iommu_dma_unmap(iommu_get_dma_domain(dev), dma_handle, size); +} + /* * Prepare a successfully-mapped scatterlist to give back to the caller. * @@ -763,18 +835,22 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) * impedance-matching, to be able to hand off a suitably-aligned list, * but still preserve the original offsets and sizes for the caller. */ -int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int prot) +static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; + int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); int i; + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + iommu_dma_sync_sg_for_device(dev, sg, nents, dir); + /* * Work out how much IOVA space we need, and align the segments to * IOVA granules for the IOMMU driver to handle. With some clever @@ -834,12 +910,16 @@ out_restore_sg: return 0; } -void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) +static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) { dma_addr_t start, end; struct scatterlist *tmp; int i; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); + /* * The scatterlist segments are mapped into a single * contiguous IOVA allocation, so this is incredibly easy. @@ -854,7 +934,7 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); } -dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, +static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, @@ -862,12 +942,268 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, iommu_get_dma_domain(dev)); } -void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, +static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } +static void *iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + bool coherent = dev_is_dma_coherent(dev); + int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); + size_t iosize = size; + void *addr; + + size = PAGE_ALIGN(size); + gfp |= __GFP_ZERO; + + if (!gfpflags_allow_blocking(gfp)) { + struct page *page; + /* + * In atomic context we can't remap anything, so we'll only + * get the virtually contiguous buffer we need by way of a + * physically contiguous allocation. + */ + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = dma_alloc_from_pool(size, &page, gfp); + } + if (!addr) + return NULL; + + *handle = __iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (*handle == DMA_MAPPING_ERROR) { + if (coherent) + __free_pages(page, get_order(size)); + else + dma_free_from_pool(addr, size); + addr = NULL; + } + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size), gfp & __GFP_NOWARN); + if (!page) + return NULL; + + *handle = __iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (*handle == DMA_MAPPING_ERROR) { + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + return NULL; + } + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, + prot, + __builtin_return_address(0)); + if (addr) { + if (!coherent) + arch_dma_prep_coherent(page, iosize); + memset(addr, 0, size); + } else { + __iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + } + } else { + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); + struct page **pages; + + pages = __iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, + handle); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + __iommu_dma_free(dev, pages, iosize, handle); + } + return addr; +} + +static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) +{ + size_t iosize = size; + + size = PAGE_ALIGN(size); + /* + * @cpu_addr will be one of 4 things depending on how it was allocated: + * - A remapped array of pages for contiguous allocations. + * - A remapped array of pages from __iommu_dma_alloc(), for all + * non-atomic allocations. + * - A non-cacheable alias from the atomic pool, for atomic + * allocations by non-coherent devices. + * - A normal lowmem address, for atomic allocations by + * coherent devices. + * Hence how dodgy the below logic looks... + */ + if (dma_in_atomic_pool(cpu_addr, size)) { + __iommu_dma_unmap_page(dev, handle, iosize, 0, 0); + dma_free_from_pool(cpu_addr, size); + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + struct page *page = vmalloc_to_page(cpu_addr); + + __iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); + } else if (is_vmalloc_addr(cpu_addr)){ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return; + __iommu_dma_free(dev, area->pages, iosize, &handle); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); + } else { + __iommu_dma_unmap_page(dev, handle, iosize, 0, 0); + __free_pages(virt_to_page(cpu_addr), get_order(size)); + } +} + +static int __iommu_dma_mmap_pfn(struct vm_area_struct *vma, + unsigned long pfn, size_t size) +{ + int ret = -ENXIO; + unsigned long nr_vma_pages = vma_pages(vma); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + + return ret; +} + +static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + struct vm_struct *area; + int ret; + + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off >= nr_pages || vma_pages(vma) > nr_pages - off) + return -ENXIO; + + if (!is_vmalloc_addr(cpu_addr)) { + unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); + return __iommu_dma_mmap_pfn(vma, pfn, size); + } + + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, + * hence in the vmalloc space. + */ + unsigned long pfn = vmalloc_to_pfn(cpu_addr); + return __iommu_dma_mmap_pfn(vma, pfn, size); + } + + area = find_vm_area(cpu_addr); + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return __iommu_dma_mmap(area->pages, size, vma); +} + +static int __iommu_dma_get_sgtable_page(struct sg_table *sgt, struct page *page, + size_t size) +{ + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; +} + +static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!is_vmalloc_addr(cpu_addr)) { + struct page *page = virt_to_page(cpu_addr); + return __iommu_dma_get_sgtable_page(sgt, page, size); + } + + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, + * hence in the vmalloc space. + */ + struct page *page = vmalloc_to_page(cpu_addr); + return __iommu_dma_get_sgtable_page(sgt, page, size); + } + + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, + GFP_KERNEL); +} + +static const struct dma_map_ops iommu_dma_ops = { + .alloc = iommu_dma_alloc, + .free = iommu_dma_free, + .mmap = iommu_dma_mmap, + .get_sgtable = iommu_dma_get_sgtable, + .map_page = iommu_dma_map_page, + .unmap_page = iommu_dma_unmap_page, + .map_sg = iommu_dma_map_sg, + .unmap_sg = iommu_dma_unmap_sg, + .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, + .sync_single_for_device = iommu_dma_sync_single_for_device, + .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, + .sync_sg_for_device = iommu_dma_sync_sg_for_device, + .map_resource = iommu_dma_map_resource, + .unmap_resource = iommu_dma_unmap_resource, +}; + +/* + * The IOMMU core code allocates the default DMA domain, which the underlying + * IOMMU driver needs to support via the dma-iommu layer. + */ +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (!domain) + goto out_err; + + /* + * The IOMMU core code allocates the default DMA domain, which the + * underlying IOMMU driver needs to support via the dma-iommu layer. + */ + if (domain->type == IOMMU_DOMAIN_DMA) { + if (iommu_dma_init_domain(domain, dma_base, size, dev)) + goto out_err; + dev->dma_ops = &iommu_dma_ops; + } + + return; +out_err: + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); +} + static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { @@ -948,3 +1284,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; msg->address_lo += lower_32_bits(msi_page->iova); } + +static int iommu_dma_init(void) +{ + return iova_cache_get(); +} +arch_initcall(iommu_dma_init); diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index e1ef265b578b..b3cc3fb84079 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -24,49 +24,13 @@ #include #include -int iommu_dma_init(void); - /* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - u64 size, struct device *dev); - -/* General helpers for DMA-API <-> IOMMU-API interaction */ -int dma_info_to_prot(enum dma_data_direction dir, bool coherent, - unsigned long attrs); - -/* - * These implement the bulk of the relevant DMA mapping callbacks, but require - * the arch code to take care of attributes and cache maintenance - */ -struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - unsigned long attrs, int prot, dma_addr_t *handle); -void iommu_dma_free(struct device *dev, struct page **pages, size_t size, - dma_addr_t *handle); - -int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma); - -dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, int prot); -int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int prot); - -/* - * Arch code with no special attribute handling may use these - * directly as DMA mapping callbacks for simplicity - */ -void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, - enum dma_data_direction dir, unsigned long attrs); -void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs); -dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -91,9 +55,9 @@ struct msi_desc; struct msi_msg; struct device; -static inline int iommu_dma_init(void) +static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, + u64 size) { - return 0; } static inline int iommu_get_dma_cookie(struct iommu_domain *domain) -- cgit v1.2.3 From a9f4d93dbeb6f5ccb50c6362ba944afe34cb8f12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 May 2019 09:29:46 +0200 Subject: iommu/dma: Switch copyright boilerplace to SPDX Signed-off-by: Christoph Hellwig Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 13 +------------ include/linux/dma-iommu.h | 13 +------------ 2 files changed, 2 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 0aff220c4aed..0cd49c2d3770 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * A fairly generic DMA-API to IOMMU-API glue layer. * @@ -5,18 +6,6 @@ * * based in part on arch/arm/mm/dma-mapping.c: * Copyright (C) 2000-2004 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ #include diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index b3cc3fb84079..05556f4d9cce 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014-2015 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ #ifndef __DMA_IOMMU_H #define __DMA_IOMMU_H -- cgit v1.2.3 From 88c50322449a4dea8687ed155968d19cfc763393 Mon Sep 17 00:00:00 2001 From: Przemyslaw Gaj Date: Tue, 16 Apr 2019 09:36:14 +0100 Subject: i3c: Drop support for I2C 10 bit addresing This patch drops support for I2C devices with 10 bit addressing. When I2C device with 10 bit address is defined in DT, I3C master registration fails. Address space for I2C devices has been reduced and ->i2c_funcs() hook has been removed. Because this patch series dropped support for 10 bit I2C devices, support is also dropped in Cadence I3C master driver and Synopsys DesignWare I3C master driver. Signed-off-by: Przemyslaw Gaj Signed-off-by: Boris Brezillon --- drivers/i3c/master.c | 21 ++++++++++++++------- drivers/i3c/master/dw-i3c-master.c | 6 ------ drivers/i3c/master/i3c-master-cdns.c | 10 +--------- include/linux/i3c/master.h | 5 +---- 4 files changed, 16 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 5f4bd52121fe..b9d2b88928e1 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1963,6 +1963,16 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, if (ret) return ret; + /* + * The I3C Specification does not clearly say I2C devices with 10-bit + * address are supported. These devices can't be passed properly through + * DEFSLVS command. + */ + if (boardinfo->base.flags & I2C_CLIENT_TEN) { + dev_err(&master->dev, "I2C device with 10 bit address not supported."); + return -ENOTSUPP; + } + /* LVR is encoded in reg[2]. */ boardinfo->lvr = reg[2]; @@ -2111,16 +2121,14 @@ static int i3c_master_i2c_adapter_xfer(struct i2c_adapter *adap, return ret ? ret : nxfers; } -static u32 i3c_master_i2c_functionalities(struct i2c_adapter *adap) +static u32 i3c_master_i2c_funcs(struct i2c_adapter *adapter) { - struct i3c_master_controller *master = i2c_adapter_to_i3c_master(adap); - - return master->ops->i2c_funcs(master); + return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C; } static const struct i2c_algorithm i3c_master_i2c_algo = { .master_xfer = i3c_master_i2c_adapter_xfer, - .functionality = i3c_master_i2c_functionalities, + .functionality = i3c_master_i2c_funcs, }; static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) @@ -2379,8 +2387,7 @@ EXPORT_SYMBOL_GPL(i3c_generic_ibi_recycle_slot); static int i3c_master_check_ops(const struct i3c_master_controller_ops *ops) { if (!ops || !ops->bus_init || !ops->priv_xfers || - !ops->send_ccc_cmd || !ops->do_daa || !ops->i2c_xfers || - !ops->i2c_funcs) + !ops->send_ccc_cmd || !ops->do_daa || !ops->i2c_xfers) return -EINVAL; if (ops->request_ibi && diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 1d83c97431c7..d6e517e69f84 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -1060,11 +1060,6 @@ static void dw_i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev) kfree(data); } -static u32 dw_i3c_master_i2c_funcs(struct i3c_master_controller *m) -{ - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -} - static irqreturn_t dw_i3c_master_irq_handler(int irq, void *dev_id) { struct dw_i3c_master *master = dev_id; @@ -1099,7 +1094,6 @@ static const struct i3c_master_controller_ops dw_mipi_i3c_ops = { .attach_i2c_dev = dw_i3c_master_attach_i2c_dev, .detach_i2c_dev = dw_i3c_master_detach_i2c_dev, .i2c_xfers = dw_i3c_master_i2c_xfers, - .i2c_funcs = dw_i3c_master_i2c_funcs, }; static int dw_i3c_probe(struct platform_device *pdev) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 8889a4fdb454..237f24adddc6 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -864,11 +864,6 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, return ret; } -static u32 cdns_i3c_master_i2c_funcs(struct i3c_master_controller *m) -{ - return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR; -} - struct cdns_i3c_i2c_dev_data { u16 id; s16 ibi; @@ -1010,9 +1005,7 @@ static int cdns_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev) master->free_rr_slots &= ~BIT(slot); i2c_dev_set_master_data(dev, data); - writel(prepare_rr0_dev_address(dev->boardinfo->base.addr) | - (dev->boardinfo->base.flags & I2C_CLIENT_TEN ? - DEV_ID_RR0_LVR_EXT_ADDR : 0), + writel(prepare_rr0_dev_address(dev->boardinfo->base.addr), master->regs + DEV_ID_RR0(data->id)); writel(dev->boardinfo->lvr, master->regs + DEV_ID_RR2(data->id)); writel(readl(master->regs + DEVS_CTRL) | @@ -1518,7 +1511,6 @@ static const struct i3c_master_controller_ops cdns_i3c_master_ops = { .send_ccc_cmd = cdns_i3c_master_send_ccc_cmd, .priv_xfers = cdns_i3c_master_priv_xfers, .i2c_xfers = cdns_i3c_master_i2c_xfers, - .i2c_funcs = cdns_i3c_master_i2c_funcs, .enable_ibi = cdns_i3c_master_enable_ibi, .disable_ibi = cdns_i3c_master_disable_ibi, .request_ibi = cdns_i3c_master_request_ibi, diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index f13fd8b1dd79..eca8337bdaa5 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -48,7 +48,7 @@ struct i3c_i2c_dev_desc { #define I3C_LVR_I2C_INDEX(x) ((x) << 5) #define I3C_LVR_I2C_FM_MODE BIT(4) -#define I2C_MAX_ADDR GENMASK(9, 0) +#define I2C_MAX_ADDR GENMASK(6, 0) /** * struct i2c_dev_boardinfo - I2C device board information @@ -390,8 +390,6 @@ struct i3c_bus { * and i2c_put_dma_safe_msg_buf() helpers provided by the I2C * framework. * This method is mandatory. - * @i2c_funcs: expose the supported I2C functionalities. - * This method is mandatory. * @request_ibi: attach an IBI handler to an I3C device. This implies defining * an IBI handler and the constraints of the IBI (maximum payload * length and number of pre-allocated slots). @@ -437,7 +435,6 @@ struct i3c_master_controller_ops { void (*detach_i2c_dev)(struct i2c_dev_desc *dev); int (*i2c_xfers)(struct i2c_dev_desc *dev, const struct i2c_msg *xfers, int nxfers); - u32 (*i2c_funcs)(struct i3c_master_controller *master); int (*request_ibi)(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_dev_desc *dev); -- cgit v1.2.3 From 4ec066c7b1476e0ca66a7acdb575627a5d1a1ee6 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 25 May 2019 13:41:33 +0800 Subject: iommu/vt-d: Cleanup get_valid_domain_for_dev() Previously, get_valid_domain_for_dev() is used to retrieve the DMA domain which has been attached to the device or allocate one if no domain has been attached yet. As we have delegated the DMA domain management to upper layer, this function is used purely to allocate a private DMA domain if the default domain doesn't work for ths device. Cleanup the code for readability. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 18 ++++++++---------- include/linux/intel-iommu.h | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c8b73802f0e0..ebc06ee79dce 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2609,7 +2609,6 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) } out: - return domain; } @@ -3558,16 +3557,17 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +static struct dmar_domain *get_private_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i, ret; + /* Device shouldn't be attached by any domains. */ domain = find_domain(dev); if (domain) - goto out; + return NULL; domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) @@ -3597,11 +3597,9 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) } out: - if (!domain) dev_err(dev, "Allocating domain failed\n"); - return domain; } @@ -3638,7 +3636,7 @@ static bool iommu_need_mapping(struct device *dev) dmar_domain = to_dmar_domain(domain); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; } - get_valid_domain_for_dev(dev); + get_private_domain_for_dev(dev); } dev_info(dev, "32bit DMA uses non-identity mapping\n"); @@ -3660,7 +3658,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3875,7 +3873,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -5547,7 +5545,7 @@ static int intel_iommu_add_device(struct device *dev) ret = iommu_request_dma_domain_for_dev(dev); if (ret) { dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_valid_domain_for_dev(dev)) { + if (!get_private_domain_for_dev(dev)) { dev_warn(dev, "Failed to get a private domain.\n"); return -ENOMEM; @@ -5640,7 +5638,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return -EINVAL; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4140726867a9..5b961c8ca64c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -660,7 +660,6 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -struct dmar_domain *get_valid_domain_for_dev(struct device *dev); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -- cgit v1.2.3 From 153969fd952d81ab8f57574f9be1a90b0a0fa791 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 21 May 2019 03:38:25 +0200 Subject: ARM: versatile: Drop CLCD platform data The Versatile family no longer makes any use of the CLCD platform data, we have moved over all users to the DRM driver that has built-in handling of the displays. Delete the old auxdata. Signed-off-by: Linus Walleij --- arch/arm/mach-versatile/versatile_dt.c | 157 --------------------- include/linux/platform_data/video-clcd-versatile.h | 28 ---- 2 files changed, 185 deletions(-) delete mode 100644 include/linux/platform_data/video-clcd-versatile.h (limited to 'include/linux') diff --git a/arch/arm/mach-versatile/versatile_dt.c b/arch/arm/mach-versatile/versatile_dt.c index 028463af726d..b5ff1ea5a944 100644 --- a/arch/arm/mach-versatile/versatile_dt.c +++ b/arch/arm/mach-versatile/versatile_dt.c @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include #include @@ -47,14 +45,12 @@ */ #define VERSATILE_SYS_PCICTL_OFFSET 0x44 #define VERSATILE_SYS_MCI_OFFSET 0x48 -#define VERSATILE_SYS_CLCD_OFFSET 0x50 /* * VERSATILE peripheral addresses */ #define VERSATILE_MMCI0_BASE 0x10005000 /* MMC interface */ #define VERSATILE_MMCI1_BASE 0x1000B000 /* MMC Interface */ -#define VERSATILE_CLCD_BASE 0x10120000 /* CLCD */ #define VERSATILE_SCTL_BASE 0x101E0000 /* System controller */ #define VERSATILE_IB2_BASE 0x24000000 /* IB2 module */ #define VERSATILE_IB2_CTL_BASE (VERSATILE_IB2_BASE + 0x03000000) @@ -96,158 +92,6 @@ static struct mmci_platform_data mmc1_plat_data = { .status = mmc_status, }; -/* - * CLCD support. - */ -#define SYS_CLCD_MODE_MASK (3 << 0) -#define SYS_CLCD_MODE_888 (0 << 0) -#define SYS_CLCD_MODE_5551 (1 << 0) -#define SYS_CLCD_MODE_565_RLSB (2 << 0) -#define SYS_CLCD_MODE_565_BLSB (3 << 0) -#define SYS_CLCD_NLCDIOON (1 << 2) -#define SYS_CLCD_VDDPOSSWITCH (1 << 3) -#define SYS_CLCD_PWR3V5SWITCH (1 << 4) -#define SYS_CLCD_ID_MASK (0x1f << 8) -#define SYS_CLCD_ID_SANYO_3_8 (0x00 << 8) -#define SYS_CLCD_ID_UNKNOWN_8_4 (0x01 << 8) -#define SYS_CLCD_ID_EPSON_2_2 (0x02 << 8) -#define SYS_CLCD_ID_SANYO_2_5 (0x07 << 8) -#define SYS_CLCD_ID_VGA (0x1f << 8) - -static bool is_sanyo_2_5_lcd; - -/* - * Disable all display connectors on the interface module. - */ -static void versatile_clcd_disable(struct clcd_fb *fb) -{ - void __iomem *sys_clcd = versatile_sys_base + VERSATILE_SYS_CLCD_OFFSET; - u32 val; - - val = readl(sys_clcd); - val &= ~SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH; - writel(val, sys_clcd); - - /* - * If the LCD is Sanyo 2x5 in on the IB2 board, turn the back-light off - */ - if (of_machine_is_compatible("arm,versatile-ab") && is_sanyo_2_5_lcd) { - unsigned long ctrl; - - ctrl = readl(versatile_ib2_ctrl); - ctrl &= ~0x01; - writel(ctrl, versatile_ib2_ctrl); - } -} - -/* - * Enable the relevant connector on the interface module. - */ -static void versatile_clcd_enable(struct clcd_fb *fb) -{ - struct fb_var_screeninfo *var = &fb->fb.var; - void __iomem *sys_clcd = versatile_sys_base + VERSATILE_SYS_CLCD_OFFSET; - u32 val; - - val = readl(sys_clcd); - val &= ~SYS_CLCD_MODE_MASK; - - switch (var->green.length) { - case 5: - val |= SYS_CLCD_MODE_5551; - break; - case 6: - if (var->red.offset == 0) - val |= SYS_CLCD_MODE_565_RLSB; - else - val |= SYS_CLCD_MODE_565_BLSB; - break; - case 8: - val |= SYS_CLCD_MODE_888; - break; - } - - /* - * Set the MUX - */ - writel(val, sys_clcd); - - /* - * And now enable the PSUs - */ - val |= SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH; - writel(val, sys_clcd); - - /* - * If the LCD is Sanyo 2x5 in on the IB2 board, turn the back-light on - */ - if (of_machine_is_compatible("arm,versatile-ab") && is_sanyo_2_5_lcd) { - unsigned long ctrl; - - ctrl = readl(versatile_ib2_ctrl); - ctrl |= 0x01; - writel(ctrl, versatile_ib2_ctrl); - } -} - -/* - * Detect which LCD panel is connected, and return the appropriate - * clcd_panel structure. Note: we do not have any information on - * the required timings for the 8.4in panel, so we presently assume - * VGA timings. - */ -static int versatile_clcd_setup(struct clcd_fb *fb) -{ - void __iomem *sys_clcd = versatile_sys_base + VERSATILE_SYS_CLCD_OFFSET; - const char *panel_name; - u32 val; - - is_sanyo_2_5_lcd = false; - - val = readl(sys_clcd) & SYS_CLCD_ID_MASK; - if (val == SYS_CLCD_ID_SANYO_3_8) - panel_name = "Sanyo TM38QV67A02A"; - else if (val == SYS_CLCD_ID_SANYO_2_5) { - panel_name = "Sanyo QVGA Portrait"; - is_sanyo_2_5_lcd = true; - } else if (val == SYS_CLCD_ID_EPSON_2_2) - panel_name = "Epson L2F50113T00"; - else if (val == SYS_CLCD_ID_VGA) - panel_name = "VGA"; - else { - printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n", - val); - panel_name = "VGA"; - } - - fb->panel = versatile_clcd_get_panel(panel_name); - if (!fb->panel) - return -EINVAL; - - return versatile_clcd_setup_dma(fb, SZ_1M); -} - -static void versatile_clcd_decode(struct clcd_fb *fb, struct clcd_regs *regs) -{ - clcdfb_decode(fb, regs); - - /* Always clear BGR for RGB565: we do the routing externally */ - if (fb->fb.var.green.length == 6) - regs->cntl &= ~CNTL_BGR; -} - -static struct clcd_board clcd_plat_data = { - .name = "Versatile", - .caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888, - .check = clcdfb_check, - .decode = versatile_clcd_decode, - .disable = versatile_clcd_disable, - .enable = versatile_clcd_enable, - .setup = versatile_clcd_setup, - .mmap = versatile_clcd_mmap_dma, - .remove = versatile_clcd_remove_dma, -}; - /* * Lookup table for attaching a specific name and platform_data pointer to * devices as they get created by of_platform_populate(). Ideally this table @@ -257,7 +101,6 @@ static struct clcd_board clcd_plat_data = { struct of_dev_auxdata versatile_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("arm,primecell", VERSATILE_MMCI0_BASE, "fpga:05", &mmc0_plat_data), OF_DEV_AUXDATA("arm,primecell", VERSATILE_MMCI1_BASE, "fpga:0b", &mmc1_plat_data), - OF_DEV_AUXDATA("arm,primecell", VERSATILE_CLCD_BASE, "dev:20", &clcd_plat_data), {} }; diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h deleted file mode 100644 index 305ebaec3afd..000000000000 --- a/include/linux/platform_data/video-clcd-versatile.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLAT_CLCD_H -#define PLAT_CLCD_H - -#ifdef CONFIG_PLAT_VERSATILE_CLCD -struct clcd_panel *versatile_clcd_get_panel(const char *); -int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long); -int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *); -void versatile_clcd_remove_dma(struct clcd_fb *); -#else -static inline struct clcd_panel *versatile_clcd_get_panel(const char *s) -{ - return NULL; -} -static inline int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize) -{ - return -ENODEV; -} -static inline int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vm) -{ - return -ENODEV; -} -static inline void versatile_clcd_remove_dma(struct clcd_fb *fb) -{ -} -#endif - -#endif -- cgit v1.2.3 From 2b2f7def058a5386838ef4dba70a860285f79e66 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 27 May 2019 04:51:53 -0700 Subject: bus: ti-sysc: Add support for missing clockdomain handling We need to let ti-sysc driver manage clockdomain autoidle for the duration of of reset, enable and idle. And we need to do it before we enable the clock and after we disable it. Currently we are still relying on platform callbacks indirectly managing clockdomain autoidle. But I noticed that for device tree only probed drivers it now happens only after we enabling the clocks and before we disable the clocks, while it should be the other way around. So far I have not noticed any issues with this though. Let's add new ti_sysc_clkdm_deny_idle() and ti_sysc_clkdm_allow_idle() functions for ti-sysc driver to use to manage clockdomains directly via platform data callbacks. Note that we can implement the clockdomain functions in pdata-quirks.c as for probing devices without "ti,hwmods" custom property we don't need to use the other platform data callbacks. Let's do this in one patch as there's is still an unlikely chance we may need to apply this as a fix for v5.2 for dropping legacy platform data for some devices. We also do have the option of adding back the platform data if needed in case of trouble. Tested-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 39 ++--------- arch/arm/mach-omap2/pdata-quirks.c | 60 ++++++++++++++++ drivers/bus/ti-sysc.c | 127 +++++++++++++++++++++++++++------- include/linux/platform_data/ti-sysc.h | 8 +++ 4 files changed, 174 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 405ac24def05..932ba221e8e7 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3445,6 +3445,7 @@ static int omap_hwmod_check_module(struct device *dev, * @dev: struct device * @oh: module * @sysc_fields: sysc register bits + * @clockdomain: clockdomain * @rev_offs: revision register offset * @sysc_offs: sysconfig register offset * @syss_offs: sysstatus register offset @@ -3456,6 +3457,7 @@ static int omap_hwmod_check_module(struct device *dev, static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, const struct ti_sysc_module_data *data, struct sysc_regbits *sysc_fields, + struct clockdomain *clkdm, s32 rev_offs, s32 sysc_offs, s32 syss_offs, u32 sysc_flags, u32 idlemodes) @@ -3463,8 +3465,6 @@ static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, struct omap_hwmod_class_sysconfig *sysc; struct omap_hwmod_class *class = NULL; struct omap_hwmod_ocp_if *oi = NULL; - struct clockdomain *clkdm = NULL; - struct clk *clk = NULL; void __iomem *regs = NULL; unsigned long flags; @@ -3511,36 +3511,6 @@ static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, oi->user = OCP_USER_MPU | OCP_USER_SDMA; } - if (!oh->_clk) { - struct clk_hw_omap *hwclk; - - clk = of_clk_get_by_name(dev->of_node, "fck"); - if (!IS_ERR(clk)) - clk_prepare(clk); - else - clk = NULL; - - /* - * Populate clockdomain based on dts clock. It is needed for - * clkdm_deny_idle() and clkdm_allow_idle() until we have have - * interconnect driver and reset driver capable of blocking - * clockdomain idle during reset, enable and idle. - */ - if (clk) { - hwclk = to_clk_hw_omap(__clk_get_hw(clk)); - if (hwclk && hwclk->clkdm_name) - clkdm = clkdm_lookup(hwclk->clkdm_name); - } - - /* - * Note that we assume interconnect driver manages the clocks - * and do not need to populate oh->_clk for dynamically - * allocated modules. - */ - clk_unprepare(clk); - clk_put(clk); - } - spin_lock_irqsave(&oh->_lock, flags); if (regs) oh->_mpu_rt_va = regs; @@ -3626,7 +3596,7 @@ int omap_hwmod_init_module(struct device *dev, u32 sysc_flags, idlemodes; int error; - if (!dev || !data) + if (!dev || !data || !data->name || !cookie) return -EINVAL; oh = _lookup(data->name); @@ -3697,7 +3667,8 @@ int omap_hwmod_init_module(struct device *dev, return error; return omap_hwmod_allocate_module(dev, oh, data, sysc_fields, - rev_offs, sysc_offs, syss_offs, + cookie->clkdm, rev_offs, + sysc_offs, syss_offs, sysc_flags, idlemodes); } diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index a2ecc5e69abb..b09cc4e8d240 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -29,6 +29,7 @@ #include #include +#include "clockdomain.h" #include "common.h" #include "common-board-devices.h" #include "control.h" @@ -463,6 +464,62 @@ static void __init dra7x_evm_mmc_quirk(void) } #endif +static struct clockdomain *ti_sysc_find_one_clockdomain(struct clk *clk) +{ + struct clockdomain *clkdm = NULL; + struct clk_hw_omap *hwclk; + + hwclk = to_clk_hw_omap(__clk_get_hw(clk)); + if (hwclk && hwclk->clkdm_name) + clkdm = clkdm_lookup(hwclk->clkdm_name); + + return clkdm; +} + +/** + * ti_sysc_clkdm_init - find clockdomain based on clock + * @fck: device functional clock + * @ick: device interface clock + * @dev: struct device + * + * Populate clockdomain based on clock. It is needed for + * clkdm_deny_idle() and clkdm_allow_idle() for blocking clockdomain + * clockdomain idle during reset, enable and idle. + * + * Note that we assume interconnect driver manages the clocks + * and do not need to populate oh->_clk for dynamically + * allocated modules. + */ +static int ti_sysc_clkdm_init(struct device *dev, + struct clk *fck, struct clk *ick, + struct ti_sysc_cookie *cookie) +{ + if (fck) + cookie->clkdm = ti_sysc_find_one_clockdomain(fck); + if (cookie->clkdm) + return 0; + if (ick) + cookie->clkdm = ti_sysc_find_one_clockdomain(ick); + if (cookie->clkdm) + return 0; + + return -ENODEV; +} + +static void ti_sysc_clkdm_deny_idle(struct device *dev, + const struct ti_sysc_cookie *cookie) +{ + if (cookie->clkdm) + clkdm_deny_idle(cookie->clkdm); +} + +static void ti_sysc_clkdm_allow_idle(struct device *dev, + const struct ti_sysc_cookie *cookie) +{ + if (cookie->clkdm) + clkdm_allow_idle(cookie->clkdm); +} + static int ti_sysc_enable_module(struct device *dev, const struct ti_sysc_cookie *cookie) { @@ -494,6 +551,9 @@ static struct of_dev_auxdata omap_auxdata_lookup[]; static struct ti_sysc_platform_data ti_sysc_pdata = { .auxdata = omap_auxdata_lookup, + .init_clockdomain = ti_sysc_clkdm_init, + .clkdm_deny_idle = ti_sysc_clkdm_deny_idle, + .clkdm_allow_idle = ti_sysc_clkdm_allow_idle, .init_module = omap_hwmod_init_module, .enable_module = ti_sysc_enable_module, .idle_module = ti_sysc_idle_module, diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index b72741668c92..e86f7850206a 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -422,6 +422,30 @@ static void sysc_disable_opt_clocks(struct sysc *ddata) } } +static void sysc_clkdm_deny_idle(struct sysc *ddata) +{ + struct ti_sysc_platform_data *pdata; + + if (ddata->legacy_mode) + return; + + pdata = dev_get_platdata(ddata->dev); + if (pdata && pdata->clkdm_deny_idle) + pdata->clkdm_deny_idle(ddata->dev, &ddata->cookie); +} + +static void sysc_clkdm_allow_idle(struct sysc *ddata) +{ + struct ti_sysc_platform_data *pdata; + + if (ddata->legacy_mode) + return; + + pdata = dev_get_platdata(ddata->dev); + if (pdata && pdata->clkdm_allow_idle) + pdata->clkdm_allow_idle(ddata->dev, &ddata->cookie); +} + /** * sysc_init_resets - init rstctrl reset line if configured * @ddata: device driver data @@ -795,6 +819,7 @@ static void sysc_show_registers(struct sysc *ddata) #define SYSC_IDLE_MASK (SYSC_NR_IDLEMODES - 1) +/* Caller needs to manage sysc_clkdm_deny_idle() and sysc_clkdm_allow_idle() */ static int sysc_enable_module(struct device *dev) { struct sysc *ddata; @@ -805,11 +830,6 @@ static int sysc_enable_module(struct device *dev) if (ddata->offsets[SYSC_SYSCONFIG] == -ENODEV) return 0; - /* - * TODO: Need to prevent clockdomain autoidle? - * See clkdm_deny_idle() in arch/mach-omap2/omap_hwmod.c - */ - regbits = ddata->cap->regbits; reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); @@ -861,6 +881,7 @@ static int sysc_best_idle_mode(u32 idlemodes, u32 *best_mode) return 0; } +/* Caller needs to manage sysc_clkdm_deny_idle() and sysc_clkdm_allow_idle() */ static int sysc_disable_module(struct device *dev) { struct sysc *ddata; @@ -872,11 +893,6 @@ static int sysc_disable_module(struct device *dev) if (ddata->offsets[SYSC_SYSCONFIG] == -ENODEV) return 0; - /* - * TODO: Need to prevent clockdomain autoidle? - * See clkdm_deny_idle() in arch/mach-omap2/omap_hwmod.c - */ - regbits = ddata->cap->regbits; reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); @@ -966,14 +982,16 @@ static int __maybe_unused sysc_runtime_suspend(struct device *dev) if (!ddata->enabled) return 0; + sysc_clkdm_deny_idle(ddata); + if (ddata->legacy_mode) { error = sysc_runtime_suspend_legacy(dev, ddata); if (error) - return error; + goto err_allow_idle; } else { error = sysc_disable_module(dev); if (error) - return error; + goto err_allow_idle; } sysc_disable_main_clocks(ddata); @@ -983,6 +1001,9 @@ static int __maybe_unused sysc_runtime_suspend(struct device *dev) ddata->enabled = false; +err_allow_idle: + sysc_clkdm_allow_idle(ddata); + return error; } @@ -996,10 +1017,12 @@ static int __maybe_unused sysc_runtime_resume(struct device *dev) if (ddata->enabled) return 0; + sysc_clkdm_deny_idle(ddata); + if (sysc_opt_clks_needed(ddata)) { error = sysc_enable_opt_clocks(ddata); if (error) - return error; + goto err_allow_idle; } error = sysc_enable_main_clocks(ddata); @@ -1018,6 +1041,8 @@ static int __maybe_unused sysc_runtime_resume(struct device *dev) ddata->enabled = true; + sysc_clkdm_allow_idle(ddata); + return 0; err_main_clocks: @@ -1025,6 +1050,8 @@ err_main_clocks: err_opt_clocks: if (sysc_opt_clks_needed(ddata)) sysc_disable_opt_clocks(ddata); +err_allow_idle: + sysc_clkdm_allow_idle(ddata); return error; } @@ -1245,6 +1272,33 @@ static void sysc_init_revision_quirks(struct sysc *ddata) } } +static int sysc_clockdomain_init(struct sysc *ddata) +{ + struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev); + struct clk *fck = NULL, *ick = NULL; + int error; + + if (!pdata || !pdata->init_clockdomain) + return 0; + + switch (ddata->nr_clocks) { + case 2: + ick = ddata->clocks[SYSC_ICK]; + /* fallthrough */ + case 1: + fck = ddata->clocks[SYSC_FCK]; + break; + case 0: + return 0; + } + + error = pdata->init_clockdomain(ddata->dev, fck, ick, &ddata->cookie); + if (!error || error == -ENODEV) + return 0; + + return error; +} + /* * Note that pdata->init_module() typically does a reset first. After * pdata->init_module() is done, PM runtime can be used for the interconnect @@ -1255,7 +1309,7 @@ static int sysc_legacy_init(struct sysc *ddata) struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev); int error; - if (!ddata->legacy_mode || !pdata || !pdata->init_module) + if (!pdata || !pdata->init_module) return 0; error = pdata->init_module(ddata->dev, ddata->mdata, &ddata->cookie); @@ -1347,7 +1401,13 @@ static int sysc_init_module(struct sysc *ddata) (SYSC_QUIRK_NO_IDLE | SYSC_QUIRK_NO_IDLE_ON_INIT)) manage_clocks = false; + error = sysc_clockdomain_init(ddata); + if (error) + return error; + if (manage_clocks) { + sysc_clkdm_deny_idle(ddata); + error = sysc_enable_opt_clocks(ddata); if (error) return error; @@ -1360,20 +1420,33 @@ static int sysc_init_module(struct sysc *ddata) ddata->revision = sysc_read_revision(ddata); sysc_init_revision_quirks(ddata); - error = sysc_legacy_init(ddata); - if (error) - goto err_main_clocks; + if (ddata->legacy_mode) { + error = sysc_legacy_init(ddata); + if (error) + goto err_main_clocks; + } + + if (!ddata->legacy_mode && manage_clocks) { + error = sysc_enable_module(ddata->dev); + if (error) + goto err_main_clocks; + } error = sysc_reset(ddata); if (error) dev_err(ddata->dev, "Reset failed with %d\n", error); + if (!ddata->legacy_mode && manage_clocks) + sysc_disable_module(ddata->dev); + err_main_clocks: if (manage_clocks) sysc_disable_main_clocks(ddata); err_opt_clocks: - if (manage_clocks) + if (manage_clocks) { sysc_disable_opt_clocks(ddata); + sysc_clkdm_allow_idle(ddata); + } return error; } @@ -2012,20 +2085,22 @@ static int sysc_init_pdata(struct sysc *ddata) struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev); struct ti_sysc_module_data *mdata; - if (!pdata || !ddata->legacy_mode) + if (!pdata) return 0; mdata = devm_kzalloc(ddata->dev, sizeof(*mdata), GFP_KERNEL); if (!mdata) return -ENOMEM; - mdata->name = ddata->legacy_mode; - mdata->module_pa = ddata->module_pa; - mdata->module_size = ddata->module_size; - mdata->offsets = ddata->offsets; - mdata->nr_offsets = SYSC_MAX_REGS; - mdata->cap = ddata->cap; - mdata->cfg = &ddata->cfg; + if (ddata->legacy_mode) { + mdata->name = ddata->legacy_mode; + mdata->module_pa = ddata->module_pa; + mdata->module_size = ddata->module_size; + mdata->offsets = ddata->offsets; + mdata->nr_offsets = SYSC_MAX_REGS; + mdata->cap = ddata->cap; + mdata->cfg = &ddata->cfg; + } ddata->mdata = mdata; diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 9256c0305968..6626fd31e309 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -19,6 +19,7 @@ enum ti_sysc_module_type { struct ti_sysc_cookie { void *data; + void *clkdm; }; /** @@ -125,9 +126,16 @@ struct ti_sysc_module_data { }; struct device; +struct clk; struct ti_sysc_platform_data { struct of_dev_auxdata *auxdata; + int (*init_clockdomain)(struct device *dev, struct clk *fck, + struct clk *ick, struct ti_sysc_cookie *cookie); + void (*clkdm_deny_idle)(struct device *dev, + const struct ti_sysc_cookie *cookie); + void (*clkdm_allow_idle)(struct device *dev, + const struct ti_sysc_cookie *cookie); int (*init_module)(struct device *dev, const struct ti_sysc_module_data *data, struct ti_sysc_cookie *cookie); -- cgit v1.2.3 From e0db94fe87dacd72be0699adcc29e321db7f1689 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 27 May 2019 04:51:53 -0700 Subject: bus: ti-sysc: Make OCP reset work for sysstatus and sysconfig reset bits We've had minimal OCP softreset support in ti-sysc interconnect target module driver only used for MCAN driver so far. But it turns out that MCAN has the sysstatus register resetdone bit inverted compared to most other modules. Let's make OCP softreset work for other typical cases with reset status in sysstatus or sysconfig register so we can use the new functions for sysc_enable_module() and sysc_disable_module() without "ti,hwmods" property in the following patches. Tested-by: Keerthy Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 72 ++++++++++++++++++++++++++--------- include/linux/platform_data/ti-sysc.h | 1 + 2 files changed, 55 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index f00997eea207..f4a048430cd1 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -153,6 +153,26 @@ static u32 sysc_read_revision(struct sysc *ddata) return sysc_read(ddata, offset); } +static u32 sysc_read_sysconfig(struct sysc *ddata) +{ + int offset = ddata->offsets[SYSC_SYSCONFIG]; + + if (offset < 0) + return 0; + + return sysc_read(ddata, offset); +} + +static u32 sysc_read_sysstatus(struct sysc *ddata) +{ + int offset = ddata->offsets[SYSC_SYSSTATUS]; + + if (offset < 0) + return 0; + + return sysc_read(ddata, offset); +} + static int sysc_add_named_clock_from_child(struct sysc *ddata, const char *name, const char *optfck_name) @@ -1369,34 +1389,49 @@ static int sysc_rstctrl_reset_deassert(struct sysc *ddata, bool reset) return reset_control_deassert(ddata->rsts); } +/* + * Note that the caller must ensure the interconnect target module is enabled + * before calling reset. Otherwise reset will not complete. + */ static int sysc_reset(struct sysc *ddata) { - int offset = ddata->offsets[SYSC_SYSCONFIG]; - int val; + int sysc_offset, syss_offset, sysc_val, rstval, quirks, error = 0; + u32 sysc_mask, syss_done; + + sysc_offset = ddata->offsets[SYSC_SYSCONFIG]; + syss_offset = ddata->offsets[SYSC_SYSSTATUS]; + quirks = ddata->cfg.quirks; - if (ddata->legacy_mode || offset < 0 || + if (ddata->legacy_mode || sysc_offset < 0 || + ddata->cap->regbits->srst_shift < 0 || ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) return 0; - /* - * Currently only support reset status in sysstatus. - * Warn and return error in all other cases - */ - if (!ddata->cfg.syss_mask) { - dev_err(ddata->dev, "No ti,syss-mask. Reset failed\n"); - return -EINVAL; - } + sysc_mask = BIT(ddata->cap->regbits->srst_shift); - val = sysc_read(ddata, offset); - val |= (0x1 << ddata->cap->regbits->srst_shift); - sysc_write(ddata, offset, val); + if (ddata->cfg.quirks & SYSS_QUIRK_RESETDONE_INVERTED) + syss_done = 0; + else + syss_done = ddata->cfg.syss_mask; + + sysc_val = sysc_read_sysconfig(ddata); + sysc_val |= sysc_mask; + sysc_write(ddata, sysc_offset, sysc_val); /* Poll on reset status */ - offset = ddata->offsets[SYSC_SYSSTATUS]; + if (syss_offset >= 0) { + error = readx_poll_timeout(sysc_read_sysstatus, ddata, rstval, + (rstval & ddata->cfg.syss_mask) == + syss_done, + 100, MAX_MODULE_SOFTRESET_WAIT); + + } else if (ddata->cfg.quirks & SYSC_QUIRK_RESET_STATUS) { + error = readx_poll_timeout(sysc_read_sysconfig, ddata, rstval, + !(rstval & sysc_mask), + 100, MAX_MODULE_SOFTRESET_WAIT); + } - return readl_poll_timeout(ddata->module_va + offset, val, - (val & ddata->cfg.syss_mask) == 0x0, - 100, MAX_MODULE_SOFTRESET_WAIT); + return error; } /* @@ -2099,6 +2134,7 @@ static const struct sysc_capabilities sysc_dra7_mcan = { .type = TI_SYSC_DRA7_MCAN, .sysc_mask = SYSC_DRA7_MCAN_ENAWAKEUP | SYSC_OMAP4_SOFTRESET, .regbits = &sysc_regbits_dra7_mcan, + .mod_quirks = SYSS_QUIRK_RESETDONE_INVERTED, }; static int sysc_init_pdata(struct sysc *ddata) diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 6626fd31e309..8822e99ff813 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -47,6 +47,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSS_QUIRK_RESETDONE_INVERTED BIT(14) #define SYSC_QUIRK_SWSUP_MSTANDBY BIT(13) #define SYSC_QUIRK_SWSUP_SIDLE_ACT BIT(12) #define SYSC_QUIRK_SWSUP_SIDLE BIT(11) -- cgit v1.2.3 From f34e1176df34b87e88eb65cd730255c913f92f8c Mon Sep 17 00:00:00 2001 From: Weitao Hou Date: Mon, 20 May 2019 13:09:48 +0800 Subject: iommu/vt-d: Fix typo in SVM code comment Fix 'acccess' to 'access'. Signed-off-by: Weitao Hou Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index e3f76315ca4d..8dfead70699c 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -57,7 +57,7 @@ struct svm_dev_ops { /** * intel_svm_bind_mm() - Bind the current process to a PASID - * @dev: Device to be granted acccess + * @dev: Device to be granted access * @pasid: Address for allocated PASID * @flags: Flags. Later for requesting supervisor mode, etc. * @ops: Callbacks to device driver -- cgit v1.2.3 From 621dc2fdcea1496ddd472c297d42e8d6642b05bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 May 2019 15:36:44 +0200 Subject: acpi/irq: Implement helper to create hierachical domains ACPI permits arbitrary producer->consumer interrupt links to be described in AML, which means a topology such as the following is perfectly legal: Device (EXIU) { Name (_HID, "SCX0008") Name (_UID, Zero) Name (_CRS, ResourceTemplate () { ... }) } Device (GPIO) { Name (_HID, "SCX0007") Name (_UID, Zero) Name (_CRS, ResourceTemplate () { Memory32Fixed (ReadWrite, SYNQUACER_GPIO_BASE, SYNQUACER_GPIO_SIZE) Interrupt (ResourceConsumer, Edge, ActiveHigh, ExclusiveAndWake, 0, "\\_SB.EXIU") { 7, } }) ... } The EXIU in this example is the external interrupt unit as can be found on Socionext SynQuacer based platforms, which converts a block of 32 SPIs from arbitrary polarity/trigger into level-high, with a separate set of config/mask/unmask/clear controls. The existing DT based driver in drivers/irqchip/irq-sni-exiu.c models this as a hierarchical domain stacked on top of the GIC's irqdomain. Since the GIC is modeled as a DT node as well, obtaining a reference to this irqdomain is easily done by going through the parent link. On ACPI systems, however, the GIC is not modeled as an object in the namespace, and so device objects cannot refer to it directly. So in order to obtain the irqdomain reference when driving the EXIU in ACPI mode, we need a helper that implicitly grabs the default domain as the parent of the hierarchy for interrupts allocated out of the global GSI pool. Reviewed-by: Mika Westerberg Reviewed-by: Lorenzo Pieralisi Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- drivers/acpi/irq.c | 26 ++++++++++++++++++++++++++ include/linux/acpi.h | 7 +++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index c3b2222e2129..ce6b25a3b7a7 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -295,3 +295,29 @@ void __init acpi_set_irq_model(enum acpi_irq_model_id model, acpi_irq_model = model; acpi_gsi_domain_id = fwnode; } + +/** + * acpi_irq_create_hierarchy - Create a hierarchical IRQ domain with the default + * GSI domain as its parent. + * @flags: Irq domain flags associated with the domain + * @size: Size of the domain. + * @fwnode: Optional fwnode of the interrupt controller + * @ops: Pointer to the interrupt domain callbacks + * @host_data: Controller private data pointer + */ +struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, + unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id, + DOMAIN_BUS_ANY); + + if (!d) + return NULL; + + return irq_domain_create_hierarchy(d, flags, size, fwnode, ops, + host_data); +} +EXPORT_SYMBOL_GPL(acpi_irq_create_hierarchy); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 98440df7fe42..70de4bc30cea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -23,6 +23,7 @@ #include #include /* for struct resource */ +#include #include #include #include @@ -327,6 +328,12 @@ int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, struct fwnode_handle *fwnode); +struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, + unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data); + #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else -- cgit v1.2.3 From 12edff045bc6dd3ab1565cc02fa4841803c2a633 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 Apr 2019 07:48:18 -0700 Subject: rcu: Make kfree_rcu() ignore NULL pointers This commit makes the kfree_rcu() macro's semantics be consistent with the likes of kfree() by adding a check for NULL pointers, so that kfree_rcu(NULL, ...) is a no-op. Reported-by: Andriy Shevchenko Reported-by: Christoph Hellwig Signed-off-by: Paul E. McKenney Reviewed-by: Andriy Shevchenko --- include/linux/rcupdate.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 922bb6848813..915460ec0872 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -805,7 +805,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree - * @rcu_head: the name of the struct rcu_head within the type of @ptr. + * @rhf: the name of the struct rcu_head within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore @@ -828,9 +828,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ -#define kfree_rcu(ptr, rcu_head) \ - __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) - +#define kfree_rcu(ptr, rhf) \ +do { \ + typeof (ptr) ___p = (ptr); \ + \ + if (___p) \ + __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ +} while (0) /* * Place this after a lock-acquisition primitive to guarantee that -- cgit v1.2.3 From fe15b50cdeeebd9248bf27e3c31278668f08bc04 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Apr 2019 16:15:00 -0700 Subject: srcu: Allocate per-CPU data for DEFINE_SRCU() in modules Adding DEFINE_SRCU() or DEFINE_STATIC_SRCU() to a loadable module requires that the size of the reserved region be increased, which is not something we want to be doing all that often. One approach would be to require that loadable modules define an srcu_struct and invoke init_srcu_struct() from their module_init function and cleanup_srcu_struct() from their module_exit function. However, this is more than a bit user unfriendly. This commit therefore creates an ___srcu_struct_ptrs linker section, and pointers to srcu_struct structures created by DEFINE_SRCU() and DEFINE_STATIC_SRCU() within a module are placed into that module's ___srcu_struct_ptrs section. The required init_srcu_struct() and cleanup_srcu_struct() functions are then automatically invoked as needed when that module is loaded and unloaded, thus allowing modules to continue to use DEFINE_SRCU() and DEFINE_STATIC_SRCU() while avoiding the need to increase the size of the reserved region. Many of the algorithms and some of the code was cheerfully cherry-picked from other code making use of linker sections, perhaps most notably from tracepoints. All bugs are nevertheless the sole property of the author. Suggested-by: Mathieu Desnoyers [ paulmck: Use __section() and use "default" in srcu_module_notify()'s "switch" statement as suggested by Joel Fernandes. ] Signed-off-by: Paul E. McKenney Tested-by: Joel Fernandes (Google) --- include/asm-generic/vmlinux.lds.h | 4 +++ include/linux/module.h | 5 +++ include/linux/srcutree.h | 14 +++++++-- kernel/module.c | 5 +++ kernel/rcu/srcutree.c | 65 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 088987e9a3ea..ba1ad39468fc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -337,6 +337,10 @@ KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \ __stop___tracepoints_ptrs = .; \ *(__tracepoints_strings)/* Tracepoints: strings */ \ + . = ALIGN(8); \ + __start___srcu_struct = .; \ + *(___srcu_struct_ptrs) \ + __end___srcu_struct = .; \ } \ \ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ diff --git a/include/linux/module.h b/include/linux/module.h index 188998d3dca9..1455812dd325 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -450,6 +451,10 @@ struct module { unsigned int num_tracepoints; tracepoint_ptr_t *tracepoints_ptrs; #endif +#ifdef CONFIG_TREE_SRCU + unsigned int num_srcu_structs; + struct srcu_struct **srcu_struct_ptrs; +#endif #ifdef CONFIG_BPF_EVENTS unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 7f7c8c050f63..8af1824c46a8 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -120,9 +120,17 @@ struct srcu_struct { * * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) +#ifdef MODULE +# define __DEFINE_SRCU(name, is_static) \ + is_static struct srcu_struct name; \ + struct srcu_struct *__srcu_struct_##name \ + __section("___srcu_struct_ptrs") = &name +#else +# define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ + is_static struct srcu_struct name = \ + __SRCU_STRUCT_INIT(name, name##_srcu_data) +#endif #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/kernel/module.c b/kernel/module.c index 6e6712b3aaf5..c79a53b629b6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3095,6 +3095,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints_ptrs), &mod->num_tracepoints); #endif +#ifdef CONFIG_TREE_SRCU + mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs", + sizeof(*mod->srcu_struct_ptrs), + &mod->num_srcu_structs); +#endif #ifdef CONFIG_BPF_EVENTS mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map", sizeof(*mod->bpf_raw_events), diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 9b761e546de8..2ded2614a2f4 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1310,3 +1310,68 @@ void __init srcu_init(void) queue_work(rcu_gp_wq, &ssp->work.work); } } + +#ifdef CONFIG_MODULES + +/* Initialize any global-scope srcu_struct structures used by this module. */ +static int srcu_module_coming(struct module *mod) +{ + int i; + struct srcu_struct **sspp = mod->srcu_struct_ptrs; + int ret; + + for (i = 0; i < mod->num_srcu_structs; i++) { + ret = init_srcu_struct(*(sspp++)); + if (WARN_ON_ONCE(ret)) + return ret; + } + return 0; +} + +/* Clean up any global-scope srcu_struct structures used by this module. */ +static void srcu_module_going(struct module *mod) +{ + int i; + struct srcu_struct **sspp = mod->srcu_struct_ptrs; + + for (i = 0; i < mod->num_srcu_structs; i++) + cleanup_srcu_struct(*(sspp++)); +} + +/* Handle one module, either coming or going. */ +static int srcu_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + ret = srcu_module_coming(mod); + break; + case MODULE_STATE_GOING: + srcu_module_going(mod); + break; + default: + break; + } + return ret; +} + +static struct notifier_block srcu_module_nb = { + .notifier_call = srcu_module_notify, + .priority = 0, +}; + +static __init int init_srcu_module_notifier(void) +{ + int ret; + + ret = register_module_notifier(&srcu_module_nb); + if (ret) + pr_warn("Failed to register srcu module notifier\n"); + return ret; +} +late_initcall(init_srcu_module_notifier); + +#endif /* #ifdef CONFIG_MODULES */ -- cgit v1.2.3 From 056b89e7e699742cc060ce722d3f26effe51b4aa Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 11 Apr 2019 16:24:21 -0400 Subject: module: Make srcu_struct ptr array as read-only Since commit title ("srcu: Allocate per-CPU data for DEFINE_SRCU() in modules"), modules that call DEFINE_{STATIC,}SRCU will have a new array of srcu_struct pointers, which is used by srcu code to initialize and clean up these structures and save valuable per-cpu reserved space. There is no reason for this array of pointers to be writable, and can cause security or other hidden bugs. Mark these are read-only after the module init has completed. Tested with the following diff to ensure array not writable: (diff is a bit reduced to avoid patch command getting confused) a/kernel/module.c b/kernel/module.c -3506,6 +3506,14 static noinline int do_init_module [snip] rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif module_enable_ro(mod, true); + + if (mod->srcu_struct_ptrs) { + // Check if srcu_struct_ptrs access is possible + char x = *(char *)mod->srcu_struct_ptrs; + *(char *)mod->srcu_struct_ptrs = 0; + *(char *)mod->srcu_struct_ptrs = x; + } + mod_tree_remove_init(mod); disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); Cc: Rasmus Villemoes Cc: paulmck@linux.vnet.ibm.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: rcu@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: kernel-team@android.com Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 8af1824c46a8..9cfcc8a756ae 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -123,7 +123,7 @@ struct srcu_struct { #ifdef MODULE # define __DEFINE_SRCU(name, is_static) \ is_static struct srcu_struct name; \ - struct srcu_struct *__srcu_struct_##name \ + struct srcu_struct * const __srcu_struct_##name \ __section("___srcu_struct_ptrs") = &name #else # define __DEFINE_SRCU(name, is_static) \ -- cgit v1.2.3 From 95bf33b55ff4465399bad843f1d8d618c8baf1f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Apr 2019 14:07:24 +0200 Subject: rcu/sync: Kill rcu_sync_type/gp_type Now that the RCU flavors have been consolidated, rcu_sync_type makes no sense because none of internal update functions aside from .held() depend on gp_type. This commit therefore removes this field and consolidates the relevant code. Signed-off-by: Oleg Nesterov [ paulmck: Added RCU and RCU-bh checks to rcu_sync_is_idle(). ] [ paulmck: And applied subsequent feedback from Oleg Nesterov. ] Signed-off-by: Paul E. McKenney --- include/linux/percpu-rwsem.h | 2 +- include/linux/rcu_sync.h | 36 ++++++++-------------------- kernel/locking/percpu-rwsem.c | 2 +- kernel/rcu/sync.c | 55 ++++--------------------------------------- 4 files changed, 17 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 03cb4b6f842e..6887636ea169 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -20,7 +20,7 @@ struct percpu_rw_semaphore { #define DEFINE_STATIC_PERCPU_RWSEM(name) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ static struct percpu_rw_semaphore name = { \ - .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ + .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 6fc53a1345b3..87971e85519c 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -13,8 +13,6 @@ #include #include -enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; - /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; @@ -23,52 +21,38 @@ struct rcu_sync { int cb_state; struct rcu_head cb_head; - - enum rcu_sync_type gp_type; }; -extern void rcu_sync_lockdep_assert(struct rcu_sync *); - /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization * - * Returns true if readers are permitted to use their fastpaths. - * Must be invoked within an RCU read-side critical section whose - * flavor matches that of the rcu_sync struture. + * Returns true if readers are permitted to use their fastpaths. Must be + * invoked within some flavor of RCU read-side critical section. */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { -#ifdef CONFIG_PROVE_RCU - rcu_sync_lockdep_assert(rsp); -#endif + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + !rcu_read_lock_bh_held() && + !rcu_read_lock_sched_held(), + "suspicious rcu_sync_is_idle() usage"); return !rsp->gp_state; /* GP_IDLE */ } -extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_init(struct rcu_sync *); extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); -#define __RCU_SYNC_INITIALIZER(name, type) { \ +#define __RCU_SYNC_INITIALIZER(name) { \ .gp_state = 0, \ .gp_count = 0, \ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ .cb_state = 0, \ - .gp_type = type, \ } -#define __DEFINE_RCU_SYNC(name, type) \ - struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) - -#define DEFINE_RCU_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_SYNC) - -#define DEFINE_RCU_SCHED_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC) - -#define DEFINE_RCU_BH_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_BH_SYNC) +#define DEFINE_RCU_SYNC(name) \ + struct rcu_sync name = __RCU_SYNC_INITIALIZER(name) #endif /* _LINUX_RCU_SYNC_H_ */ diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index f17dad99eec8..48cab93a47fd 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, return -ENOMEM; /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); + rcu_sync_init(&sem->rss); __init_rwsem(&sem->rw_sem, name, rwsem_key); rcuwait_init(&sem->writer); sem->readers_block = 0; diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index a8304d90573f..ee427e138dad 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -10,65 +10,20 @@ #include #include -#ifdef CONFIG_PROVE_RCU -#define __INIT_HELD(func) .held = func, -#else -#define __INIT_HELD(func) -#endif - -static const struct { - void (*sync)(void); - void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); - void (*wait)(void); -#ifdef CONFIG_PROVE_RCU - int (*held)(void); -#endif -} gp_ops[] = { - [RCU_SYNC] = { - .sync = synchronize_rcu, - .call = call_rcu, - .wait = rcu_barrier, - __INIT_HELD(rcu_read_lock_held) - }, - [RCU_SCHED_SYNC] = { - .sync = synchronize_rcu, - .call = call_rcu, - .wait = rcu_barrier, - __INIT_HELD(rcu_read_lock_sched_held) - }, - [RCU_BH_SYNC] = { - .sync = synchronize_rcu, - .call = call_rcu, - .wait = rcu_barrier, - __INIT_HELD(rcu_read_lock_bh_held) - }, -}; - enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; #define rss_lock gp_wait.lock -#ifdef CONFIG_PROVE_RCU -void rcu_sync_lockdep_assert(struct rcu_sync *rsp) -{ - RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(), - "suspicious rcu_sync_is_idle() usage"); -} - -EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert); -#endif - /** * rcu_sync_init() - Initialize an rcu_sync structure * @rsp: Pointer to rcu_sync structure to be initialized * @type: Flavor of RCU with which to synchronize rcu_sync structure */ -void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) +void rcu_sync_init(struct rcu_sync *rsp) { memset(rsp, 0, sizeof(*rsp)); init_waitqueue_head(&rsp->gp_wait); - rsp->gp_type = type; } /** @@ -114,7 +69,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) WARN_ON_ONCE(need_wait && need_sync); if (need_sync) { - gp_ops[rsp->gp_type].sync(); + synchronize_rcu(); rsp->gp_state = GP_PASSED; wake_up_all(&rsp->gp_wait); } else if (need_wait) { @@ -167,7 +122,7 @@ static void rcu_sync_func(struct rcu_head *rhp) * to catch a later GP. */ rsp->cb_state = CB_PENDING; - gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); + call_rcu(&rsp->cb_head, rcu_sync_func); } else { /* * We're at least a GP after rcu_sync_exit(); eveybody will now @@ -195,7 +150,7 @@ void rcu_sync_exit(struct rcu_sync *rsp) if (!--rsp->gp_count) { if (rsp->cb_state == CB_IDLE) { rsp->cb_state = CB_PENDING; - gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); + call_rcu(&rsp->cb_head, rcu_sync_func); } else if (rsp->cb_state == CB_PENDING) { rsp->cb_state = CB_REPLAY; } @@ -220,7 +175,7 @@ void rcu_sync_dtor(struct rcu_sync *rsp) spin_unlock_irq(&rsp->rss_lock); if (cb_state != CB_IDLE) { - gp_ops[rsp->gp_type].wait(); + rcu_barrier(); WARN_ON_ONCE(rsp->cb_state != CB_IDLE); } } -- cgit v1.2.3 From 3f2947b78151ec938dc06aea4ba0e11e56becdff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Apr 2019 18:32:41 +0200 Subject: locking/percpu-rwsem: Add DEFINE_PERCPU_RWSEM(), use it to initialize cgroup_threadgroup_rwsem Turn DEFINE_STATIC_PERCPU_RWSEM() into __DEFINE_PERCPU_RWSEM() with the additional "is_static" argument to introduce DEFINE_PERCPU_RWSEM(). Change cgroup.c to use DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem). Signed-off-by: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/percpu-rwsem.h | 8 ++++++-- kernel/cgroup/cgroup.c | 3 +-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 6887636ea169..2809b44cbbee 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -17,14 +17,18 @@ struct percpu_rw_semaphore { int readers_block; }; -#define DEFINE_STATIC_PERCPU_RWSEM(name) \ +#define __DEFINE_PERCPU_RWSEM(name, is_static) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ -static struct percpu_rw_semaphore name = { \ +is_static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ } +#define DEFINE_PERCPU_RWSEM(name) \ + __DEFINE_PERCPU_RWSEM(name, /* not static */) +#define DEFINE_STATIC_PERCPU_RWSEM(name) \ + __DEFINE_PERCPU_RWSEM(name, static) extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 217cec4e22c6..b112e93388dc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); */ static DEFINE_SPINLOCK(cgroup_file_kn_lock); -struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem); #define cgroup_assert_mutex_or_rcu_locked() \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ @@ -5616,7 +5616,6 @@ int __init cgroup_init(void) int ssid; BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); - BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); -- cgit v1.2.3 From 89da3b94bb97417ca2c5b0ce3a28643819030247 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 25 Apr 2019 18:50:55 +0200 Subject: rcu/sync: Simplify the state machine With this patch rcu_sync has a single state variable and the transition rules become really simple: GP_IDLE - owned by the first rcu_sync_enter() which moves it to GP_ENTER - owned by rcu-callback which moves it to GP_PASSED - owned by the last rcu_sync_exit() which moves it to GP_EXIT - and this is the only "nontrivial" state. rcu-callback moves it back to GP_IDLE unless another enter() comes before a GP pass. If rcu-callback is invoked before the next rcu_sync_exit() it must see gp_count incremented by that enter() and set GP_PASSED. Otherwise, if the next rcu_sync_exit() wins the race, it will move it to GP_REPLAY - owned by rcu-callback which moves it to GP_EXIT Signed-off-by: Oleg Nesterov [ paulmck: While here, apply READ_ONCE() and WRITE_ONCE() to ->gp_state. ] [ paulmck: Tweaks to make htmldocs happy. (Reported by kbuild test robot.) ] Signed-off-by: Paul E. McKenney --- include/linux/rcu_sync.h | 4 +- kernel/rcu/sync.c | 165 +++++++++++++++++++++++++++-------------------- 2 files changed, 96 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 87971e85519c..9b83865d24f9 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -19,7 +19,6 @@ struct rcu_sync { int gp_count; wait_queue_head_t gp_wait; - int cb_state; struct rcu_head cb_head; }; @@ -36,7 +35,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) !rcu_read_lock_bh_held() && !rcu_read_lock_sched_held(), "suspicious rcu_sync_is_idle() usage"); - return !rsp->gp_state; /* GP_IDLE */ + return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } extern void rcu_sync_init(struct rcu_sync *); @@ -49,7 +48,6 @@ extern void rcu_sync_dtor(struct rcu_sync *); .gp_state = 0, \ .gp_count = 0, \ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ - .cb_state = 0, \ } #define DEFINE_RCU_SYNC(name) \ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index ee427e138dad..d4558ab7a07d 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -10,15 +10,13 @@ #include #include -enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; -enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; +enum { GP_IDLE = 0, GP_ENTER, GP_PASSED, GP_EXIT, GP_REPLAY }; #define rss_lock gp_wait.lock /** * rcu_sync_init() - Initialize an rcu_sync structure * @rsp: Pointer to rcu_sync structure to be initialized - * @type: Flavor of RCU with which to synchronize rcu_sync structure */ void rcu_sync_init(struct rcu_sync *rsp) { @@ -41,56 +39,26 @@ void rcu_sync_enter_start(struct rcu_sync *rsp) rsp->gp_state = GP_PASSED; } -/** - * rcu_sync_enter() - Force readers onto slowpath - * @rsp: Pointer to rcu_sync structure to use for synchronization - * - * This function is used by updaters who need readers to make use of - * a slowpath during the update. After this function returns, all - * subsequent calls to rcu_sync_is_idle() will return false, which - * tells readers to stay off their fastpaths. A later call to - * rcu_sync_exit() re-enables reader slowpaths. - * - * When called in isolation, rcu_sync_enter() must wait for a grace - * period, however, closely spaced calls to rcu_sync_enter() can - * optimize away the grace-period wait via a state machine implemented - * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func(). - */ -void rcu_sync_enter(struct rcu_sync *rsp) -{ - bool need_wait, need_sync; - spin_lock_irq(&rsp->rss_lock); - need_wait = rsp->gp_count++; - need_sync = rsp->gp_state == GP_IDLE; - if (need_sync) - rsp->gp_state = GP_PENDING; - spin_unlock_irq(&rsp->rss_lock); +static void rcu_sync_func(struct rcu_head *rhp); - WARN_ON_ONCE(need_wait && need_sync); - if (need_sync) { - synchronize_rcu(); - rsp->gp_state = GP_PASSED; - wake_up_all(&rsp->gp_wait); - } else if (need_wait) { - wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED); - } else { - /* - * Possible when there's a pending CB from a rcu_sync_exit(). - * Nobody has yet been allowed the 'fast' path and thus we can - * avoid doing any sync(). The callback will get 'dropped'. - */ - WARN_ON_ONCE(rsp->gp_state != GP_PASSED); - } +static void rcu_sync_call(struct rcu_sync *rsp) +{ + call_rcu(&rsp->cb_head, rcu_sync_func); } /** * rcu_sync_func() - Callback function managing reader access to fastpath * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization * - * This function is passed to one of the call_rcu() functions by + * This function is passed to call_rcu() function by rcu_sync_enter() and * rcu_sync_exit(), so that it is invoked after a grace period following the - * that invocation of rcu_sync_exit(). It takes action based on events that + * that invocation of enter/exit. + * + * If it is called by rcu_sync_enter() it signals that all the readers were + * switched onto slow path. + * + * If it is called by rcu_sync_exit() it takes action based on events that * have taken place in the meantime, so that closely spaced rcu_sync_enter() * and rcu_sync_exit() pairs need not wait for a grace period. * @@ -107,35 +75,88 @@ static void rcu_sync_func(struct rcu_head *rhp) struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; - WARN_ON_ONCE(rsp->gp_state != GP_PASSED); - WARN_ON_ONCE(rsp->cb_state == CB_IDLE); + WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE); + WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED); spin_lock_irqsave(&rsp->rss_lock, flags); if (rsp->gp_count) { /* - * A new rcu_sync_begin() has happened; drop the callback. + * We're at least a GP after the GP_IDLE->GP_ENTER transition. */ - rsp->cb_state = CB_IDLE; - } else if (rsp->cb_state == CB_REPLAY) { + WRITE_ONCE(rsp->gp_state, GP_PASSED); + wake_up_locked(&rsp->gp_wait); + } else if (rsp->gp_state == GP_REPLAY) { /* - * A new rcu_sync_exit() has happened; requeue the callback - * to catch a later GP. + * A new rcu_sync_exit() has happened; requeue the callback to + * catch a later GP. */ - rsp->cb_state = CB_PENDING; - call_rcu(&rsp->cb_head, rcu_sync_func); + WRITE_ONCE(rsp->gp_state, GP_EXIT); + rcu_sync_call(rsp); } else { /* - * We're at least a GP after rcu_sync_exit(); eveybody will now - * have observed the write side critical section. Let 'em rip!. + * We're at least a GP after the last rcu_sync_exit(); eveybody + * will now have observed the write side critical section. + * Let 'em rip!. */ - rsp->cb_state = CB_IDLE; - rsp->gp_state = GP_IDLE; + WRITE_ONCE(rsp->gp_state, GP_IDLE); } spin_unlock_irqrestore(&rsp->rss_lock, flags); } /** - * rcu_sync_exit() - Allow readers back onto fast patch after grace period + * rcu_sync_enter() - Force readers onto slowpath + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * This function is used by updaters who need readers to make use of + * a slowpath during the update. After this function returns, all + * subsequent calls to rcu_sync_is_idle() will return false, which + * tells readers to stay off their fastpaths. A later call to + * rcu_sync_exit() re-enables reader slowpaths. + * + * When called in isolation, rcu_sync_enter() must wait for a grace + * period, however, closely spaced calls to rcu_sync_enter() can + * optimize away the grace-period wait via a state machine implemented + * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func(). + */ +void rcu_sync_enter(struct rcu_sync *rsp) +{ + int gp_state; + + spin_lock_irq(&rsp->rss_lock); + gp_state = rsp->gp_state; + if (gp_state == GP_IDLE) { + WRITE_ONCE(rsp->gp_state, GP_ENTER); + WARN_ON_ONCE(rsp->gp_count); + /* + * Note that we could simply do rcu_sync_call(rsp) here and + * avoid the "if (gp_state == GP_IDLE)" block below. + * + * However, synchronize_rcu() can be faster if rcu_expedited + * or rcu_blocking_is_gp() is true. + * + * Another reason is that we can't wait for rcu callback if + * we are called at early boot time but this shouldn't happen. + */ + } + rsp->gp_count++; + spin_unlock_irq(&rsp->rss_lock); + + if (gp_state == GP_IDLE) { + /* + * See the comment above, this simply does the "synchronous" + * call_rcu(rcu_sync_func) which does GP_ENTER -> GP_PASSED. + */ + synchronize_rcu(); + rcu_sync_func(&rsp->cb_head); + /* Not really needed, wait_event() would see GP_PASSED. */ + return; + } + + wait_event(rsp->gp_wait, READ_ONCE(rsp->gp_state) >= GP_PASSED); +} + +/** + * rcu_sync_exit() - Allow readers back onto fast path after grace period * @rsp: Pointer to rcu_sync structure to use for synchronization * * This function is used by updaters who have completed, and can therefore @@ -146,13 +167,16 @@ static void rcu_sync_func(struct rcu_head *rhp) */ void rcu_sync_exit(struct rcu_sync *rsp) { + WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE); + WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0); + spin_lock_irq(&rsp->rss_lock); if (!--rsp->gp_count) { - if (rsp->cb_state == CB_IDLE) { - rsp->cb_state = CB_PENDING; - call_rcu(&rsp->cb_head, rcu_sync_func); - } else if (rsp->cb_state == CB_PENDING) { - rsp->cb_state = CB_REPLAY; + if (rsp->gp_state == GP_PASSED) { + WRITE_ONCE(rsp->gp_state, GP_EXIT); + rcu_sync_call(rsp); + } else if (rsp->gp_state == GP_EXIT) { + WRITE_ONCE(rsp->gp_state, GP_REPLAY); } } spin_unlock_irq(&rsp->rss_lock); @@ -164,18 +188,19 @@ void rcu_sync_exit(struct rcu_sync *rsp) */ void rcu_sync_dtor(struct rcu_sync *rsp) { - int cb_state; + int gp_state; - WARN_ON_ONCE(rsp->gp_count); + WARN_ON_ONCE(READ_ONCE(rsp->gp_count)); + WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED); spin_lock_irq(&rsp->rss_lock); - if (rsp->cb_state == CB_REPLAY) - rsp->cb_state = CB_PENDING; - cb_state = rsp->cb_state; + if (rsp->gp_state == GP_REPLAY) + WRITE_ONCE(rsp->gp_state, GP_EXIT); + gp_state = rsp->gp_state; spin_unlock_irq(&rsp->rss_lock); - if (cb_state != CB_IDLE) { + if (gp_state != GP_IDLE) { rcu_barrier(); - WARN_ON_ONCE(rsp->cb_state != CB_IDLE); + WARN_ON_ONCE(rsp->gp_state != GP_IDLE); } } -- cgit v1.2.3 From ff3bf92d90d396e51eb78c5ecde11a994ab7a179 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 Apr 2019 14:44:49 -0700 Subject: torture: Allow inter-stutter interval to be specified Currently, the inter-stutter interval is the same as the stutter duration, that is, whatever number of jiffies is passed into torture_stutter_init(). This has worked well for quite some time, but the addition of forward-progress testing to rcutorture can delay processes for several seconds, which can triple the time that they are stuttered. This commit therefore adds a second argument to torture_stutter_init() that specifies the inter-stutter interval. While locktorture preserves the current behavior, rcutorture uses the RCU CPU stall warning interval to provide a wider inter-stutter interval. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 2 +- kernel/locking/locktorture.c | 2 +- kernel/rcu/rcutorture.c | 5 ++++- kernel/torture.c | 6 ++++-- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 23d80db426d7..a620118385bb 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void)); /* Task stuttering, which forces load/no-load transitions. */ bool stutter_wait(const char *title); -int torture_stutter_init(int s); +int torture_stutter_init(int s, int sgap); /* Initialization and cleanup. */ bool torture_init_begin(char *ttype, int v); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 80a463d31a8d..c513031cd7e3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -975,7 +975,7 @@ static int __init lock_torture_init(void) goto unwind; } if (stutter > 0) { - firsterr = torture_stutter_init(stutter); + firsterr = torture_stutter_init(stutter, stutter); if (firsterr) goto unwind; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 954ac2b98619..a16d6abe1715 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2373,7 +2373,10 @@ rcu_torture_init(void) if (stutter < 0) stutter = 0; if (stutter) { - firsterr = torture_stutter_init(stutter * HZ); + int t; + + t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ; + firsterr = torture_stutter_init(stutter * HZ, t); if (firsterr) goto unwind; } diff --git a/kernel/torture.c b/kernel/torture.c index de0e0ecf88e1..a8d9bdfba7c3 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -570,6 +570,7 @@ static void torture_shutdown_cleanup(void) static struct task_struct *stutter_task; static int stutter_pause_test; static int stutter; +static int stutter_gap; /* * Block until the stutter interval ends. This must be called periodically @@ -621,7 +622,7 @@ static int torture_stutter(void *arg) } WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) - schedule_timeout_interruptible(stutter); + schedule_timeout_interruptible(stutter_gap); torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); torture_kthread_stopping("torture_stutter"); @@ -631,9 +632,10 @@ static int torture_stutter(void *arg) /* * Initialize and kick off the torture_stutter kthread. */ -int torture_stutter_init(const int s) +int torture_stutter_init(const int s, const int sgap) { stutter = s; + stutter_gap = sgap; return torture_create_kthread(torture_stutter, NULL, stutter_task); } EXPORT_SYMBOL_GPL(torture_stutter_init); -- cgit v1.2.3 From 0b3b094ac9a7bb1fcf5d694f3ec981e6864a63d3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 May 2019 16:28:34 +0200 Subject: fanotify: Disallow permission events for proc filesystem Proc filesystem has special locking rules for various files. Thus fanotify which opens files on event delivery can easily deadlock against another process that waits for fanotify permission event to be handled. Since permission events on /proc have doubtful value anyway, just disallow them. Link: https://lore.kernel.org/linux-fsdevel/20190320131642.GE9485@quack2.suse.cz/ Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 22 ++++++++++++++++++++++ fs/proc/root.c | 2 +- include/linux/fs.h | 1 + 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a90bb19dcfa2..91006f47e420 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return 0; } +static int fanotify_events_supported(struct path *path, __u64 mask) +{ + /* + * Some filesystems such as 'proc' acquire unusual locks when opening + * files. For them fanotify permission events have high chances of + * deadlocking the system - open done when reporting fanotify event + * blocks on this "unusual" lock while another process holding the lock + * waits for fanotify permission event to be answered. Just disallow + * permission events for such filesystems. + */ + if (mask & FANOTIFY_PERM_EVENTS && + path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) + return -EINVAL; + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (flags & FAN_MARK_ADD) { + ret = fanotify_events_supported(&path, mask); + if (ret) + goto path_put_and_out; + } + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { ret = fanotify_test_fid(&path, &__fsid); if (ret) diff --git a/fs/proc/root.c b/fs/proc/root.c index 8b145e7b9661..522199e9525e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = { .init_fs_context = proc_init_fs_context, .parameters = &proc_fs_parameters, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..c7136c98b5ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2184,6 +2184,7 @@ struct file_system_type { #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ +#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_description *parameters; -- cgit v1.2.3 From 4bfc0bb2c60e2f4cc8eb60f03cf8dfa72336272a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 25 May 2019 09:37:39 -0700 Subject: bpf: decouple the lifetime of cgroup_bpf from cgroup itself Currently the lifetime of bpf programs attached to a cgroup is bound to the lifetime of the cgroup itself. It means that if a user forgets (or intentionally avoids) to detach a bpf program before removing the cgroup, it will stay attached up to the release of the cgroup. Since the cgroup can stay in the dying state (the state between being rmdir()'ed and being released) for a very long time, it leads to a waste of memory. Also, it blocks a possibility to implement the memcg-based memory accounting for bpf objects, because a circular reference dependency will occur. Charged memory pages are pinning the corresponding memory cgroup, and if the memory cgroup is pinning the attached bpf program, nothing will be ever released. A dying cgroup can not contain any processes, so the only chance for an attached bpf program to be executed is a live socket associated with the cgroup. So in order to release all bpf data early, let's count associated sockets using a new percpu refcounter. On cgroup removal the counter is transitioned to the atomic mode, and as soon as it reaches 0, all bpf programs are detached. Because cgroup_bpf_release() can block, it can't be called from the percpu ref counter callback directly, so instead an asynchronous work is scheduled. The reference counter is not socket specific, and can be used for any other types of programs, which can be executed from a cgroup-bpf hook outside of the process context, had such a need arise in the future. Signed-off-by: Roman Gushchin Cc: jolsa@redhat.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 11 +++++++++-- include/linux/cgroup.h | 18 ++++++++++++++++++ kernel/bpf/cgroup.c | 41 +++++++++++++++++++++++++++++++++++++---- kernel/cgroup/cgroup.c | 11 ++++++++--- 4 files changed, 72 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index cb3c6b3b89c8..9f100fc422c3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -72,10 +73,16 @@ struct cgroup_bpf { /* temp storage for effective prog array used by prog_attach/detach */ struct bpf_prog_array __rcu *inactive; + + /* reference counter used to detach bpf programs after cgroup removal */ + struct percpu_ref refcnt; + + /* cgroup_bpf is released using a work queue */ + struct work_struct release_work; }; -void cgroup_bpf_put(struct cgroup *cgrp); int cgroup_bpf_inherit(struct cgroup *cgrp); +void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); @@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, struct bpf_prog; struct cgroup_bpf {}; -static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } +static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c0077adeea83..49e8facf7c4a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task) #endif /* !CONFIG_CGROUPS */ +#ifdef CONFIG_CGROUP_BPF +static inline void cgroup_bpf_get(struct cgroup *cgrp) +{ + percpu_ref_get(&cgrp->bpf.refcnt); +} + +static inline void cgroup_bpf_put(struct cgroup *cgrp) +{ + percpu_ref_put(&cgrp->bpf.refcnt); +} + +#else /* CONFIG_CGROUP_BPF */ + +static inline void cgroup_bpf_get(struct cgroup *cgrp) {} +static inline void cgroup_bpf_put(struct cgroup *cgrp) {} + +#endif /* CONFIG_CGROUP_BPF */ + #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fcde0f7b2585..d995edbe816d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -22,12 +22,21 @@ DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); EXPORT_SYMBOL(cgroup_bpf_enabled_key); +void cgroup_bpf_offline(struct cgroup *cgrp) +{ + cgroup_get(cgrp); + percpu_ref_kill(&cgrp->bpf.refcnt); +} + /** - * cgroup_bpf_put() - put references of all bpf programs - * @cgrp: the cgroup to modify + * cgroup_bpf_release() - put references of all bpf programs and + * release all cgroup bpf data + * @work: work structure embedded into the cgroup to modify */ -void cgroup_bpf_put(struct cgroup *cgrp) +static void cgroup_bpf_release(struct work_struct *work) { + struct cgroup *cgrp = container_of(work, struct cgroup, + bpf.release_work); enum bpf_cgroup_storage_type stype; unsigned int type; @@ -47,6 +56,22 @@ void cgroup_bpf_put(struct cgroup *cgrp) } bpf_prog_array_free(cgrp->bpf.effective[type]); } + + percpu_ref_exit(&cgrp->bpf.refcnt); + cgroup_put(cgrp); +} + +/** + * cgroup_bpf_release_fn() - callback used to schedule releasing + * of bpf cgroup data + * @ref: percpu ref counter structure + */ +static void cgroup_bpf_release_fn(struct percpu_ref *ref) +{ + struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); + + INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); + queue_work(system_wq, &cgrp->bpf.release_work); } /* count number of elements in the list. @@ -167,7 +192,12 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) */ #define NR ARRAY_SIZE(cgrp->bpf.effective) struct bpf_prog_array __rcu *arrays[NR] = {}; - int i; + int ret, i; + + ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, + GFP_KERNEL); + if (ret) + return ret; for (i = 0; i < NR; i++) INIT_LIST_HEAD(&cgrp->bpf.progs[i]); @@ -183,6 +213,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) cleanup: for (i = 0; i < NR; i++) bpf_prog_array_free(arrays[i]); + + percpu_ref_exit(&cgrp->bpf.refcnt); + return -ENOMEM; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 217cec4e22c6..ef9cfbfc82a9 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work) if (cgrp->kn) RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); - - cgroup_bpf_put(cgrp); } mutex_unlock(&cgroup_mutex); @@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) cgroup1_check_for_release(parent); + cgroup_bpf_offline(cgrp); + /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); @@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) * Don't use cgroup_get_live(). */ cgroup_get(sock_cgroup_ptr(skcd)); + cgroup_bpf_get(sock_cgroup_ptr(skcd)); return; } @@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) cset = task_css_set(current); if (likely(cgroup_tryget(cset->dfl_cgrp))) { skcd->val = (unsigned long)cset->dfl_cgrp; + cgroup_bpf_get(cset->dfl_cgrp); break; } cpu_relax(); @@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) void cgroup_sk_free(struct sock_cgroup_data *skcd) { - cgroup_put(sock_cgroup_ptr(skcd)); + struct cgroup *cgrp = sock_cgroup_ptr(skcd); + + cgroup_bpf_put(cgrp); + cgroup_put(cgrp); } #endif /* CONFIG_SOCK_CGROUP_DATA */ -- cgit v1.2.3 From d2d0727b1654e11563f181f4d3d48b9275514480 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:39 -0700 Subject: fscrypt: simplify bounce page handling Currently, bounce page handling for writes to encrypted files is unnecessarily complicated. A fscrypt_ctx is allocated along with each bounce page, page_private(bounce_page) points to this fscrypt_ctx, and fscrypt_ctx::w::control_page points to the original pagecache page. However, because writes don't use the fscrypt_ctx for anything else, there's no reason why page_private(bounce_page) can't just point to the original pagecache page directly. Therefore, this patch makes this change. In the process, it also cleans up the API exposed to filesystems that allows testing whether a page is a bounce page, getting the pagecache page from a bounce page, and freeing a bounce page. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 38 +++------------- fs/crypto/crypto.c | 104 +++++++++++++++----------------------------- fs/crypto/fscrypt_private.h | 4 +- fs/ext4/page-io.c | 36 ++++++--------- fs/f2fs/data.c | 12 +++-- include/linux/fscrypt.h | 38 +++++++++++----- 6 files changed, 84 insertions(+), 148 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b46021ebde85..c857b70b5328 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -70,46 +70,18 @@ void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); -void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - struct fscrypt_ctx *ctx; - struct page *bounce_page; - - /* The bounce data pages are unmapped. */ - if ((*page)->mapping) - return; - - /* The bounce data page is unmapped. */ - bounce_page = *page; - ctx = (struct fscrypt_ctx *)page_private(bounce_page); - - /* restore control page */ - *page = ctx->w.control_page; - - if (restore) - fscrypt_restore_control_page(bounce_page); -} -EXPORT_SYMBOL(fscrypt_pullback_bio_page); - int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = NULL; + struct page *ciphertext_page; struct bio *bio; int ret, err = 0; BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - ctx = fscrypt_get_ctx(GFP_NOFS); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT); - if (IS_ERR(ciphertext_page)) { - err = PTR_ERR(ciphertext_page); - goto errout; - } + ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); + if (!ciphertext_page) + return -ENOMEM; while (len--) { err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk, @@ -147,7 +119,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, } err = 0; errout: - fscrypt_release_ctx(ctx); + fscrypt_free_bounce_page(ciphertext_page); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 335a362ee446..881e2a69f8a6 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -64,18 +64,11 @@ EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); * * If the encryption context was allocated from the pre-allocated pool, returns * it to that pool. Else, frees it. - * - * If there's a bounce page in the context, this frees that. */ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) { unsigned long flags; - if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) { - mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool); - ctx->w.bounce_page = NULL; - } - ctx->w.control_page = NULL; if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { kmem_cache_free(fscrypt_ctx_cachep, ctx); } else { @@ -100,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) unsigned long flags; /* - * We first try getting the ctx from a free list because in - * the common case the ctx will have an allocated and - * initialized crypto tfm, so it's probably a worthwhile - * optimization. For the bounce page, we first try getting it - * from the kernel allocator because that's just about as fast - * as getting it from a list and because a cache of free pages - * should generally be a "last resort" option for a filesystem - * to be able to do its job. + * First try getting a ctx from the free list so that we don't have to + * call into the slab allocator. */ spin_lock_irqsave(&fscrypt_ctx_lock, flags); ctx = list_first_entry_or_null(&fscrypt_free_ctxs, @@ -123,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) } else { ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } - ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL; return ctx; } EXPORT_SYMBOL(fscrypt_get_ctx); +struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) +{ + return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); +} + +/** + * fscrypt_free_bounce_page() - free a ciphertext bounce page + * + * Free a bounce page that was allocated by fscrypt_encrypt_page(), or by + * fscrypt_alloc_bounce_page() directly. + */ +void fscrypt_free_bounce_page(struct page *bounce_page) +{ + if (!bounce_page) + return; + set_page_private(bounce_page, (unsigned long)NULL); + ClearPagePrivate(bounce_page); + mempool_free(bounce_page, fscrypt_bounce_page_pool); +} +EXPORT_SYMBOL(fscrypt_free_bounce_page); + void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { @@ -186,16 +193,6 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, return 0; } -struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags) -{ - ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); - if (ctx->w.bounce_page == NULL) - return ERR_PTR(-ENOMEM); - ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL; - return ctx->w.bounce_page; -} - /** * fscypt_encrypt_page() - Encrypts a page * @inode: The inode for which the encryption should take place @@ -210,22 +207,12 @@ struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, * previously written data. * @gfp_flags: The gfp flag for memory allocation * - * Encrypts @page using the ctx encryption context. Performs encryption - * either in-place or into a newly allocated bounce page. - * Called on the page write path. - * - * Bounce page allocation is the default. - * In this case, the contents of @page are encrypted and stored in an - * allocated bounce page. @page has to be locked and the caller must call - * fscrypt_restore_control_page() on the returned ciphertext page to - * release the bounce buffer and the encryption context. - * - * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in - * fscrypt_operations. Here, the input-page is returned with its content - * encrypted. + * Encrypts @page. If the filesystem set FS_CFLG_OWN_PAGES, then the data is + * encrypted in-place and @page is returned. Else, a bounce page is allocated, + * the data is encrypted into the bounce page, and the bounce page is returned. + * The caller is responsible for calling fscrypt_free_bounce_page(). * - * Return: A page with the encrypted content on success. Else, an - * error value or NULL. + * Return: A page containing the encrypted data on success, else an ERR_PTR() */ struct page *fscrypt_encrypt_page(const struct inode *inode, struct page *page, @@ -234,7 +221,6 @@ struct page *fscrypt_encrypt_page(const struct inode *inode, u64 lblk_num, gfp_t gfp_flags) { - struct fscrypt_ctx *ctx; struct page *ciphertext_page = page; int err; @@ -253,30 +239,20 @@ struct page *fscrypt_encrypt_page(const struct inode *inode, BUG_ON(!PageLocked(page)); - ctx = fscrypt_get_ctx(gfp_flags); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - /* The encryption operation will require a bounce page. */ - ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags); - if (IS_ERR(ciphertext_page)) - goto errout; + ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); + if (!ciphertext_page) + return ERR_PTR(-ENOMEM); - ctx->w.control_page = page; err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page, ciphertext_page, len, offs, gfp_flags); if (err) { - ciphertext_page = ERR_PTR(err); - goto errout; + fscrypt_free_bounce_page(ciphertext_page); + return ERR_PTR(err); } SetPagePrivate(ciphertext_page); - set_page_private(ciphertext_page, (unsigned long)ctx); - lock_page(ciphertext_page); - return ciphertext_page; - -errout: - fscrypt_release_ctx(ctx); + set_page_private(ciphertext_page, (unsigned long)page); return ciphertext_page; } EXPORT_SYMBOL(fscrypt_encrypt_page); @@ -355,18 +331,6 @@ const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -void fscrypt_restore_control_page(struct page *page) -{ - struct fscrypt_ctx *ctx; - - ctx = (struct fscrypt_ctx *)page_private(page); - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); - unlock_page(page); - fscrypt_release_ctx(ctx); -} -EXPORT_SYMBOL(fscrypt_restore_control_page); - static void fscrypt_destroy(void) { struct fscrypt_ctx *pos, *n; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7da276159593..4122ee1a0b7b 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -94,7 +94,6 @@ typedef enum { } fscrypt_direction_t; #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 static inline bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) @@ -123,8 +122,7 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags); -extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags); +extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4690618a92e9..13d5ecc0af03 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; -#ifdef CONFIG_FS_ENCRYPTION - struct page *data_page = NULL; -#endif + struct page *bounce_page = NULL; struct buffer_head *bh, *head; unsigned bio_start = bvec->bv_offset; unsigned bio_end = bio_start + bvec->bv_len; @@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio) if (!page) continue; -#ifdef CONFIG_FS_ENCRYPTION - if (!page->mapping) { - /* The bounce data pages are unmapped. */ - data_page = page; - fscrypt_pullback_bio_page(&page, false); + if (fscrypt_is_bounce_page(page)) { + bounce_page = page; + page = fscrypt_pagecache_page(bounce_page); } -#endif if (bio->bi_status) { SetPageError(page); @@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio) bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); if (!under_io) { -#ifdef CONFIG_FS_ENCRYPTION - if (data_page) - fscrypt_restore_control_page(data_page); -#endif + fscrypt_free_bounce_page(bounce_page); end_page_writeback(page); } } @@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct writeback_control *wbc, bool keep_towrite) { - struct page *data_page = NULL; + struct page *bounce_page = NULL; struct inode *inode = page->mapping->host; unsigned block_start; struct buffer_head *bh, *head; @@ -479,10 +471,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, - page->index, gfp_flags); - if (IS_ERR(data_page)) { - ret = PTR_ERR(data_page); + bounce_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, + page->index, gfp_flags); + if (IS_ERR(bounce_page)) { + ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { if (io->io_bio) { ext4_io_submit(io); @@ -491,7 +483,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } - data_page = NULL; + bounce_page = NULL; goto out; } } @@ -500,8 +492,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - ret = io_submit_add_bh(io, inode, - data_page ? data_page : page, bh); + ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh); if (ret) { /* * We only get here on ENOMEM. Not much else @@ -517,8 +508,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, /* Error stopped previous loop? Clean up buffers... */ if (ret) { out: - if (data_page) - fscrypt_restore_control_page(data_page); + fscrypt_free_bounce_page(bounce_page); printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); redirty_page_for_writepage(wbc, page); do { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eda4181d2092..968ebdbcb583 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -185,7 +185,7 @@ static void f2fs_write_end_io(struct bio *bio) continue; } - fscrypt_pullback_bio_page(&page, true); + fscrypt_finalize_bounce_page(&page); if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); @@ -362,10 +362,9 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, bio_for_each_segment_all(bvec, io->bio, iter_all) { - if (bvec->bv_page->mapping) - target = bvec->bv_page; - else - target = fscrypt_control_page(bvec->bv_page); + target = bvec->bv_page; + if (fscrypt_is_bounce_page(target)) + target = fscrypt_pagecache_page(target); if (inode && inode == target->mapping->host) return true; @@ -1900,8 +1899,7 @@ got_it: err = f2fs_inplace_write_data(fio); if (err) { if (f2fs_encrypted_file(inode)) - fscrypt_pullback_bio_page(&fio->encrypted_page, - true); + fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); } else { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index f7680ef1abd2..d016fa384d60 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -112,12 +112,17 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); -static inline struct page *fscrypt_control_page(struct page *page) +static inline bool fscrypt_is_bounce_page(struct page *page) { - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; + return page->mapping == NULL; } -extern void fscrypt_restore_control_page(struct page *); +static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) +{ + return (struct page *)page_private(bounce_page); +} + +extern void fscrypt_free_bounce_page(struct page *bounce_page); /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); @@ -223,7 +228,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, extern void fscrypt_decrypt_bio(struct bio *); extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio); -extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); @@ -300,15 +304,19 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } -static inline struct page *fscrypt_control_page(struct page *page) +static inline bool fscrypt_is_bounce_page(struct page *page) +{ + return false; +} + +static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } -static inline void fscrypt_restore_control_page(struct page *page) +static inline void fscrypt_free_bounce_page(struct page *bounce_page) { - return; } /* policy.c */ @@ -410,11 +418,6 @@ static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, { } -static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - return; -} - static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { @@ -692,4 +695,15 @@ static inline int fscrypt_encrypt_symlink(struct inode *inode, return 0; } +/* If *pagep is a bounce page, free it and set *pagep to the pagecache page */ +static inline void fscrypt_finalize_bounce_page(struct page **pagep) +{ + struct page *page = *pagep; + + if (fscrypt_is_bounce_page(page)) { + *pagep = fscrypt_pagecache_page(page); + fscrypt_free_bounce_page(page); + } +} + #endif /* _LINUX_FSCRYPT_H */ -- cgit v1.2.3 From 2a415a0257314cb2e49fb9ac4c6770837112f261 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:40 -0700 Subject: fscrypt: remove the "write" part of struct fscrypt_ctx Now that fscrypt_ctx is not used for writes, remove the 'w' fields. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 11 +++++------ fs/crypto/crypto.c | 14 +++++++------- include/linux/fscrypt.h | 7 ++----- 3 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index c857b70b5328..c53425348387 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -53,9 +53,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio); static void completion_pages(struct work_struct *work) { - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; + struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work); + struct bio *bio = ctx->bio; __fscrypt_decrypt_bio(bio, true); fscrypt_release_ctx(ctx); @@ -64,9 +63,9 @@ static void completion_pages(struct work_struct *work) void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) { - INIT_WORK(&ctx->r.work, completion_pages); - ctx->r.bio = bio; - fscrypt_enqueue_decrypt_work(&ctx->r.work); + INIT_WORK(&ctx->work, completion_pages); + ctx->bio = bio; + fscrypt_enqueue_decrypt_work(&ctx->work); } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 881e2a69f8a6..9dd7a643eae0 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -59,11 +59,11 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work) EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); /** - * fscrypt_release_ctx() - Releases an encryption context - * @ctx: The encryption context to release. + * fscrypt_release_ctx() - Release a decryption context + * @ctx: The decryption context to release. * - * If the encryption context was allocated from the pre-allocated pool, returns - * it to that pool. Else, frees it. + * If the decryption context was allocated from the pre-allocated pool, return + * it to that pool. Else, free it. */ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) { @@ -80,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) EXPORT_SYMBOL(fscrypt_release_ctx); /** - * fscrypt_get_ctx() - Gets an encryption context + * fscrypt_get_ctx() - Get a decryption context * @gfp_flags: The gfp flag for memory allocation * - * Allocates and initializes an encryption context. + * Allocate and initialize a decryption context. * - * Return: A new encryption context on success; an ERR_PTR() otherwise. + * Return: A new decryption context on success; an ERR_PTR() otherwise. */ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d016fa384d60..1c7287f146a9 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -63,16 +63,13 @@ struct fscrypt_operations { unsigned int max_namelen; }; +/* Decryption work */ struct fscrypt_ctx { union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; struct { struct bio *bio; struct work_struct work; - } r; + }; struct list_head free_list; /* Free list */ }; u8 flags; /* Flags */ -- cgit v1.2.3 From 03569f2fb8e734f281379767de674e23c38b0b14 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:43 -0700 Subject: fscrypt: introduce fscrypt_encrypt_block_inplace() fscrypt_encrypt_page() behaves very differently depending on whether the filesystem set FS_CFLG_OWN_PAGES in its fscrypt_operations. This makes the function difficult to understand and document. It also makes it so that all callers have to provide inode and lblk_num, when fscrypt could determine these itself for pagecache pages. Therefore, move the FS_CFLG_OWN_PAGES behavior into a new function fscrypt_encrypt_block_inplace(). This is in preparation for allowing encryption on ext4 filesystems with blocksize != PAGE_SIZE. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 50 ++++++++++++++++++++++++++++++------------------- fs/ubifs/crypto.c | 12 ++++++------ include/linux/fscrypt.h | 13 +++++++++++++ 3 files changed, 50 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 59337287e580..2969a1dff10b 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -200,8 +200,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, /** * fscypt_encrypt_page() - Encrypts a page * @inode: The inode for which the encryption should take place - * @page: The page to encrypt. Must be locked for bounce-page - * encryption. + * @page: The page to encrypt. Must be locked. * @len: Length of data to encrypt in @page and encrypted * data in returned page. * @offs: Offset of data within @page and returned @@ -211,10 +210,9 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * previously written data. * @gfp_flags: The gfp flag for memory allocation * - * Encrypts @page. If the filesystem set FS_CFLG_OWN_PAGES, then the data is - * encrypted in-place and @page is returned. Else, a bounce page is allocated, - * the data is encrypted into the bounce page, and the bounce page is returned. - * The caller is responsible for calling fscrypt_free_bounce_page(). + * Encrypts @page. A bounce page is allocated, the data is encrypted into the + * bounce page, and the bounce page is returned. The caller is responsible for + * calling fscrypt_free_bounce_page(). * * Return: A page containing the encrypted data on success, else an ERR_PTR() */ @@ -225,24 +223,12 @@ struct page *fscrypt_encrypt_page(const struct inode *inode, u64 lblk_num, gfp_t gfp_flags) { - struct page *ciphertext_page = page; + struct page *ciphertext_page; int err; - if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) { - /* with inplace-encryption we just encrypt the page */ - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, - ciphertext_page, len, offs, - gfp_flags); - if (err) - return ERR_PTR(err); - - return ciphertext_page; - } - if (WARN_ON_ONCE(!PageLocked(page))) return ERR_PTR(-EINVAL); - /* The encryption operation will require a bounce page. */ ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); if (!ciphertext_page) return ERR_PTR(-ENOMEM); @@ -259,6 +245,32 @@ struct page *fscrypt_encrypt_page(const struct inode *inode, } EXPORT_SYMBOL(fscrypt_encrypt_page); +/** + * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to encrypt + * @len: Size of block to encrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to encrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * @gfp_flags: Memory allocation flags + * + * Encrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page, + len, offs, gfp_flags); +} +EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); + /** * fscrypt_decrypt_page() - Decrypts a page in-place * @inode: The corresponding inode for the page to decrypt. diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 4aaedf2d7f44..032efdad2e66 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, { struct ubifs_info *c = inode->i_sb->s_fs_info; void *p = &dn->data; - struct page *ret; unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE); + int err; ubifs_assert(c, pad_len <= *out_len); dn->compr_size = cpu_to_le16(in_len); @@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, if (pad_len != in_len) memset(p + in_len, 0, pad_len - in_len); - ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len, - offset_in_page(&dn->data), block, GFP_NOFS); - if (IS_ERR(ret)) { - ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret)); - return PTR_ERR(ret); + err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len, + offset_in_page(p), block, GFP_NOFS); + if (err) { + ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err); + return err; } *out_len = pad_len; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 1c7287f146a9..a9b2d26e615d 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -106,6 +106,10 @@ extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64, gfp_t); +extern int fscrypt_encrypt_block_inplace(const struct inode *inode, + struct page *page, unsigned int len, + unsigned int offs, u64 lblk_num, + gfp_t gfp_flags); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); @@ -293,6 +297,15 @@ static inline struct page *fscrypt_encrypt_page(const struct inode *inode, return ERR_PTR(-EOPNOTSUPP); } +static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, u64 lblk_num, + gfp_t gfp_flags) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_decrypt_page(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, -- cgit v1.2.3 From 53bc1d854c64c20d967dab15b111baca02a6d99e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:44 -0700 Subject: fscrypt: support encrypting multiple filesystem blocks per page Rename fscrypt_encrypt_page() to fscrypt_encrypt_pagecache_blocks() and redefine its behavior to encrypt all filesystem blocks from the given region of the given page, rather than assuming that the region consists of just one filesystem block. Also remove the 'inode' and 'lblk_num' parameters, since they can be retrieved from the page as it's already assumed to be a pagecache page. This is in preparation for allowing encryption on ext4 filesystems with blocksize != PAGE_SIZE. This is based on work by Chandan Rajendra. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 67 ++++++++++++++++++++++++++++--------------------- fs/ext4/page-io.c | 4 +-- fs/f2fs/data.c | 5 ++-- include/linux/fscrypt.h | 17 +++++++------ 4 files changed, 53 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 2969a1dff10b..ff43a13c3abf 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -122,8 +122,8 @@ struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) /** * fscrypt_free_bounce_page() - free a ciphertext bounce page * - * Free a bounce page that was allocated by fscrypt_encrypt_page(), or by - * fscrypt_alloc_bounce_page() directly. + * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(), + * or by fscrypt_alloc_bounce_page() directly. */ void fscrypt_free_bounce_page(struct page *bounce_page) { @@ -198,52 +198,63 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, } /** - * fscypt_encrypt_page() - Encrypts a page - * @inode: The inode for which the encryption should take place - * @page: The page to encrypt. Must be locked. - * @len: Length of data to encrypt in @page and encrypted - * data in returned page. - * @offs: Offset of data within @page and returned - * page holding encrypted data. - * @lblk_num: Logical block number. This must be unique for multiple - * calls with same inode, except when overwriting - * previously written data. - * @gfp_flags: The gfp flag for memory allocation + * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page + * @page: The locked pagecache page containing the block(s) to encrypt + * @len: Total size of the block(s) to encrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to encrypt. Must be + * a multiple of the filesystem's block size. + * @gfp_flags: Memory allocation flags + * + * A new bounce page is allocated, and the specified block(s) are encrypted into + * it. In the bounce page, the ciphertext block(s) will be located at the same + * offsets at which the plaintext block(s) were located in the source page; any + * other parts of the bounce page will be left uninitialized. However, normally + * blocksize == PAGE_SIZE and the whole page is encrypted at once. * - * Encrypts @page. A bounce page is allocated, the data is encrypted into the - * bounce page, and the bounce page is returned. The caller is responsible for - * calling fscrypt_free_bounce_page(). + * This is for use by the filesystem's ->writepages() method. * - * Return: A page containing the encrypted data on success, else an ERR_PTR() + * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ -struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) +struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs, + gfp_t gfp_flags) { + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; struct page *ciphertext_page; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; int err; if (WARN_ON_ONCE(!PageLocked(page))) return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return ERR_PTR(-EINVAL); + ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); if (!ciphertext_page) return ERR_PTR(-ENOMEM); - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, - ciphertext_page, len, offs, gfp_flags); - if (err) { - fscrypt_free_bounce_page(ciphertext_page); - return ERR_PTR(err); + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, + page, ciphertext_page, + blocksize, i, gfp_flags); + if (err) { + fscrypt_free_bounce_page(ciphertext_page); + return ERR_PTR(err); + } } SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)page); return ciphertext_page; } -EXPORT_SYMBOL(fscrypt_encrypt_page); +EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks); /** * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 13d5ecc0af03..40ee33df5764 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -471,8 +471,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - bounce_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, - page->index, gfp_flags); + bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, + 0, gfp_flags); if (IS_ERR(bounce_page)) { ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 968ebdbcb583..a546ac8685ea 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1726,8 +1726,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, fio->page->index, gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, + PAGE_SIZE, 0, + gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a9b2d26e615d..c7e16bd16a6c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -103,9 +103,11 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) extern void fscrypt_enqueue_decrypt_work(struct work_struct *); extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, - unsigned int, unsigned int, - u64, gfp_t); + +extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs, + gfp_t gfp_flags); extern int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, @@ -288,11 +290,10 @@ static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) return; } -static inline struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) +static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs, + gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3 From 41adbcb7267b0060682576d523956160b5c617bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:46 -0700 Subject: fscrypt: introduce fscrypt_decrypt_block_inplace() Currently fscrypt_decrypt_page() does one of two logically distinct things depending on whether FS_CFLG_OWN_PAGES is set in the filesystem's fscrypt_operations: decrypt a pagecache page in-place, or decrypt a filesystem block in-place in any page. Currently these happen to share the same implementation, but this conflates the notion of blocks and pages. It also makes it so that all callers have to provide inode and lblk_num, when fscrypt could determine these itself for pagecache pages. Therefore, move the FS_CFLG_OWN_PAGES behavior into a new function fscrypt_decrypt_block_inplace(). This mirrors fscrypt_encrypt_block_inplace(). This is in preparation for allowing encryption on ext4 filesystems with blocksize != PAGE_SIZE. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 31 +++++++++++++++++++++++++++---- fs/ubifs/crypto.c | 7 ++++--- include/linux/fscrypt.h | 11 +++++++++++ 3 files changed, 42 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index ff43a13c3abf..f82c45ac285a 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -285,8 +285,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** * fscrypt_decrypt_page() - Decrypts a page in-place * @inode: The corresponding inode for the page to decrypt. - * @page: The page to decrypt. Must be locked in case - * it is a writeback page (FS_CFLG_OWN_PAGES unset). + * @page: The page to decrypt. Must be locked. * @len: Number of bytes in @page to be decrypted. * @offs: Start of data in @page. * @lblk_num: Logical block number. @@ -300,8 +299,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); int fscrypt_decrypt_page(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { - if (WARN_ON_ONCE(!PageLocked(page) && - !(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))) + if (WARN_ON_ONCE(!PageLocked(page))) return -EINVAL; return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, @@ -309,6 +307,31 @@ int fscrypt_decrypt_page(const struct inode *inode, struct page *page, } EXPORT_SYMBOL(fscrypt_decrypt_page); +/** + * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to decrypt + * @len: Size of block to decrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to decrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * + * Decrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, + len, offs, GFP_NOFS); +} +EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); + /* * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 032efdad2e66..22be7aeb96c4 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, } ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE); - err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen, - offset_in_page(&dn->data), block); + err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data), + dlen, offset_in_page(&dn->data), + block); if (err) { - ubifs_err(c, "fscrypt_decrypt_page failed: %i", err); + ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err); return err; } *out_len = clen; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c7e16bd16a6c..315affc99b05 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -114,6 +114,9 @@ extern int fscrypt_encrypt_block_inplace(const struct inode *inode, gfp_t gfp_flags); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); +extern int fscrypt_decrypt_block_inplace(const struct inode *inode, + struct page *page, unsigned int len, + unsigned int offs, u64 lblk_num); static inline bool fscrypt_is_bounce_page(struct page *page) { @@ -315,6 +318,14 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } +static inline int fscrypt_decrypt_block_inplace(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, u64 lblk_num) +{ + return -EOPNOTSUPP; +} + static inline bool fscrypt_is_bounce_page(struct page *page) { return false; -- cgit v1.2.3 From aa8bc1ac6ef32a332671ca25e06cfd277a3839a5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:29:47 -0700 Subject: fscrypt: support decrypting multiple filesystem blocks per page Rename fscrypt_decrypt_page() to fscrypt_decrypt_pagecache_blocks() and redefine its behavior to decrypt all filesystem blocks in the given region of the given page, rather than assuming that the region consists of just one filesystem block. Also remove the 'inode' and 'lblk_num' parameters, since they can be retrieved from the page as it's already assumed to be a pagecache page. This is in preparation for allowing encryption on ext4 filesystems with blocksize != PAGE_SIZE. This is based on work by Chandan Rajendra. Reviewed-by: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 3 +-- fs/crypto/crypto.c | 46 ++++++++++++++++++++++++++++++++-------------- fs/ext4/inode.c | 7 +++---- include/linux/fscrypt.h | 12 ++++++------ 4 files changed, 42 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index f9111ffa12ff..61da06fda45c 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -33,8 +33,7 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); + int ret = fscrypt_decrypt_pagecache_blocks(page, PAGE_SIZE, 0); if (ret) SetPageError(page); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index f82c45ac285a..45c3d0427fb2 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -283,29 +283,47 @@ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** - * fscrypt_decrypt_page() - Decrypts a page in-place - * @inode: The corresponding inode for the page to decrypt. - * @page: The page to decrypt. Must be locked. - * @len: Number of bytes in @page to be decrypted. - * @offs: Start of data in @page. - * @lblk_num: Logical block number. + * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page + * @page: The locked pagecache page containing the block(s) to decrypt + * @len: Total size of the block(s) to decrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to decrypt. Must be + * a multiple of the filesystem's block size. * - * Decrypts page in-place using the ctx encryption context. + * The specified block(s) are decrypted in-place within the pagecache page, + * which must still be locked and not uptodate. Normally, blocksize == + * PAGE_SIZE and the whole page is decrypted at once. * - * Called from the read completion callback. + * This is for use by the filesystem's ->readpages() method. * - * Return: Zero on success, non-zero otherwise. + * Return: 0 on success; -errno on failure */ -int fscrypt_decrypt_page(const struct inode *inode, struct page *page, - unsigned int len, unsigned int offs, u64 lblk_num) +int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, + unsigned int offs) { + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; + int err; + if (WARN_ON_ONCE(!PageLocked(page))) return -EINVAL; - return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, - len, offs, GFP_NOFS); + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return -EINVAL; + + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, + page, blocksize, i, GFP_NOFS); + if (err) + return err; + } + return 0; } -EXPORT_SYMBOL(fscrypt_decrypt_page); +EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks); /** * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7f77c643008..8bfd8941f5ff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1232,8 +1232,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); + err = fscrypt_decrypt_pagecache_blocks(page, PAGE_SIZE, 0); return err; } #endif @@ -4066,8 +4065,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, - page, PAGE_SIZE, 0, page->index)); + WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks( + page, PAGE_SIZE, 0)); } } if (ext4_should_journal_data(inode)) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 315affc99b05..bd8f207a2fb6 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -112,8 +112,9 @@ extern int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); -extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, - unsigned int, u64); + +extern int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, + unsigned int offs); extern int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num); @@ -310,10 +311,9 @@ static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_decrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, unsigned int offs, - u64 lblk_num) +static inline int fscrypt_decrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 54e9c9d4b506b611228890752d1cfa960e0965e1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 May 2019 14:14:41 -0700 Subject: bpf: remove __rcu annotations from bpf_prog_array Drop __rcu annotations and rcu read sections from bpf_prog_array helper functions. They are not needed since all existing callers call those helpers from the rcu update side while holding a mutex. This guarantees that use-after-free could not happen. In the next patches I'll fix the callers with missing rcu_dereference_protected to make sparse/lockdep happy, the proper way to use these helpers is: struct bpf_prog_array __rcu *progs = ...; struct bpf_prog_array *p; mutex_lock(&mtx); p = rcu_dereference_protected(progs, lockdep_is_held(&mtx)); bpf_prog_array_length(p); bpf_prog_array_copy_to_user(p, ...); bpf_prog_array_delete_safe(p, ...); bpf_prog_array_copy_info(p, ...); bpf_prog_array_copy(p, ...); bpf_prog_array_free(p); mutex_unlock(&mtx); No functional changes! rcu_dereference_protected with lockdep_is_held should catch any cases where we update prog array without a mutex (I've looked at existing call sites and I think we hold a mutex everywhere). Motivation is to fix sparse warnings: kernel/bpf/core.c:1803:9: warning: incorrect type in argument 1 (different address spaces) kernel/bpf/core.c:1803:9: expected struct callback_head *head kernel/bpf/core.c:1803:9: got struct callback_head [noderef] * kernel/bpf/core.c:1877:44: warning: incorrect type in initializer (different address spaces) kernel/bpf/core.c:1877:44: expected struct bpf_prog_array_item *item kernel/bpf/core.c:1877:44: got struct bpf_prog_array_item [noderef] * kernel/bpf/core.c:1901:26: warning: incorrect type in assignment (different address spaces) kernel/bpf/core.c:1901:26: expected struct bpf_prog_array_item *existing kernel/bpf/core.c:1901:26: got struct bpf_prog_array_item [noderef] * kernel/bpf/core.c:1935:26: warning: incorrect type in assignment (different address spaces) kernel/bpf/core.c:1935:26: expected struct bpf_prog_array_item *[assigned] existing kernel/bpf/core.c:1935:26: got struct bpf_prog_array_item [noderef] * v2: * remove comment about potential race; that can't happen because all callers are in rcu-update section Cc: Roman Gushchin Acked-by: Roman Gushchin Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 12 ++++++------ kernel/bpf/core.c | 37 +++++++++++++------------------------ 2 files changed, 19 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d98141edb74b..ff3e00ff84d2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -514,17 +514,17 @@ struct bpf_prog_array { }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); -void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); -int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); -int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, +void bpf_prog_array_free(struct bpf_prog_array *progs); +int bpf_prog_array_length(struct bpf_prog_array *progs); +int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, __u32 __user *prog_ids, u32 cnt); -void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, +void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, struct bpf_prog *old_prog); -int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, +int bpf_prog_array_copy_info(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt); -int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, +int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, struct bpf_prog_array **new_array); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3675b19ecb90..33fb292f2e30 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1795,38 +1795,33 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) return &empty_prog_array.hdr; } -void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +void bpf_prog_array_free(struct bpf_prog_array *progs) { - if (!progs || - progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + if (!progs || progs == &empty_prog_array.hdr) return; kfree_rcu(progs, rcu); } -int bpf_prog_array_length(struct bpf_prog_array __rcu *array) +int bpf_prog_array_length(struct bpf_prog_array *array) { struct bpf_prog_array_item *item; u32 cnt = 0; - rcu_read_lock(); - item = rcu_dereference(array)->items; - for (; item->prog; item++) + for (item = array->items; item->prog; item++) if (item->prog != &dummy_bpf_prog.prog) cnt++; - rcu_read_unlock(); return cnt; } -static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, +static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt) { struct bpf_prog_array_item *item; int i = 0; - item = rcu_dereference_check(array, 1)->items; - for (; item->prog; item++) { + for (item = array->items; item->prog; item++) { if (item->prog == &dummy_bpf_prog.prog) continue; prog_ids[i] = item->prog->aux->id; @@ -1839,7 +1834,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, return !!(item->prog); } -int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, +int bpf_prog_array_copy_to_user(struct bpf_prog_array *array, __u32 __user *prog_ids, u32 cnt) { unsigned long err = 0; @@ -1850,18 +1845,12 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, * cnt = bpf_prog_array_length(); * if (cnt > 0) * bpf_prog_array_copy_to_user(..., cnt); - * so below kcalloc doesn't need extra cnt > 0 check, but - * bpf_prog_array_length() releases rcu lock and - * prog array could have been swapped with empty or larger array, - * so always copy 'cnt' prog_ids to the user. - * In a rare race the user will see zero prog_ids + * so below kcalloc doesn't need extra cnt > 0 check. */ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; - rcu_read_lock(); nospc = bpf_prog_array_copy_core(array, ids, cnt); - rcu_read_unlock(); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); if (err) @@ -1871,19 +1860,19 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, return 0; } -void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array, +void bpf_prog_array_delete_safe(struct bpf_prog_array *array, struct bpf_prog *old_prog) { - struct bpf_prog_array_item *item = array->items; + struct bpf_prog_array_item *item; - for (; item->prog; item++) + for (item = array->items; item->prog; item++) if (item->prog == old_prog) { WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); break; } } -int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, +int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, struct bpf_prog_array **new_array) @@ -1947,7 +1936,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, return 0; } -int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, +int bpf_prog_array_copy_info(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt) { -- cgit v1.2.3 From dbcc1ba26e43bd32cb308e50ac4cb4a29d2f5967 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 May 2019 14:14:43 -0700 Subject: bpf: cgroup: properly use bpf_prog_array api Now that we don't have __rcu markers on the bpf_prog_array helpers, let's use proper rcu_dereference_protected to obtain array pointer under mutex. We also don't need __rcu annotations on cgroup_bpf.inactive since it's not read/updated concurrently. v4: * drop cgroup_rcu_xyz wrappers and use rcu APIs directly; presumably should be more clear to understand which mutex/refcount protects each particular place v3: * amend cgroup_rcu_dereference to include percpu_ref_is_dying; cgroup_bpf is now reference counted and we don't hold cgroup_mutex anymore in cgroup_bpf_release v2: * replace xchg with rcu_swap_protected Cc: Roman Gushchin Signed-off-by: Stanislav Fomichev Acked-by: Roman Gushchin Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 28 +++++++++++++++++----------- 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9f100fc422c3..b631ee75762d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -72,7 +72,7 @@ struct cgroup_bpf { u32 flags[MAX_BPF_ATTACH_TYPE]; /* temp storage for effective prog array used by prog_attach/detach */ - struct bpf_prog_array __rcu *inactive; + struct bpf_prog_array *inactive; /* reference counter used to detach bpf programs after cgroup removal */ struct percpu_ref refcnt; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index d995edbe816d..ff594eb86fd7 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -38,6 +38,7 @@ static void cgroup_bpf_release(struct work_struct *work) struct cgroup *cgrp = container_of(work, struct cgroup, bpf.release_work); enum bpf_cgroup_storage_type stype; + struct bpf_prog_array *old_array; unsigned int type; for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { @@ -54,7 +55,10 @@ static void cgroup_bpf_release(struct work_struct *work) kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } - bpf_prog_array_free(cgrp->bpf.effective[type]); + old_array = rcu_dereference_protected( + cgrp->bpf.effective[type], + percpu_ref_is_dying(&cgrp->bpf.refcnt)); + bpf_prog_array_free(old_array); } percpu_ref_exit(&cgrp->bpf.refcnt); @@ -126,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, */ static int compute_effective_progs(struct cgroup *cgrp, enum bpf_attach_type type, - struct bpf_prog_array __rcu **array) + struct bpf_prog_array **array) { enum bpf_cgroup_storage_type stype; struct bpf_prog_array *progs; @@ -164,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp, } } while ((p = cgroup_parent(p))); - rcu_assign_pointer(*array, progs); + *array = progs; return 0; } static void activate_effective_progs(struct cgroup *cgrp, enum bpf_attach_type type, - struct bpf_prog_array __rcu *array) + struct bpf_prog_array *old_array) { - struct bpf_prog_array __rcu *old_array; - - old_array = xchg(&cgrp->bpf.effective[type], array); + rcu_swap_protected(cgrp->bpf.effective[type], old_array, + lockdep_is_held(&cgroup_mutex)); /* free prog array after grace period, since __cgroup_bpf_run_*() * might be still walking the array */ @@ -191,7 +194,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) * that array below is variable length */ #define NR ARRAY_SIZE(cgrp->bpf.effective) - struct bpf_prog_array __rcu *arrays[NR] = {}; + struct bpf_prog_array *arrays[NR] = {}; int ret, i; ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, @@ -477,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, enum bpf_attach_type type = attr->query.attach_type; struct list_head *progs = &cgrp->bpf.progs[type]; u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog_array *effective; int cnt, ret = 0, i; + effective = rcu_dereference_protected(cgrp->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) - cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); + cnt = bpf_prog_array_length(effective); else cnt = prog_list_length(progs); @@ -497,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { - return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], - prog_ids, cnt); + return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); } else { struct bpf_prog_list *pl; u32 id; -- cgit v1.2.3 From 91ca180dbdd687d45fe4aab055b02d29c91b90df Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 6 Feb 2019 16:39:13 -0600 Subject: signal: Use force_sig_fault_to_task for the two calls that don't deliver to current In preparation for removing the task parameter from force_sig_fault introduce force_sig_fault_to_task and use it for the two cases where it matters. On mips force_fcr31_sig calls force_sig_fault and is called on either the current task, or a task that is suspended and is being switched to by the scheduler. This is safe because the task being switched to by the scheduler is guaranteed to be suspended. This ensures that task->sighand is stable while the signal is delivered to it. On parisc user_enable_single_step calls force_sig_fault and is in turn called by ptrace_request. The function ptrace_request always calls user_enable_single_step on a child that is stopped for tracing. The child being traced and not reaped ensures that child->sighand is not NULL, and that the child will not change child->sighand. Signed-off-by: "Eric W. Biederman" --- arch/mips/kernel/traps.c | 2 +- arch/parisc/kernel/ptrace.c | 6 +++--- include/linux/sched/signal.h | 4 ++++ kernel/signal.c | 12 +++++++++++- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index a6031b045b95..62df48b6fb46 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -733,7 +733,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, else if (fcr31 & FPU_CSR_INE_X) si_code = FPE_FLTRES; - force_sig_fault(SIGFPE, si_code, fault_addr, tsk); + force_sig_fault_to_task(SIGFPE, si_code, fault_addr, tsk); } int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index a3d2fb4e6dd2..f642ba378ffa 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -88,9 +88,9 @@ void user_enable_single_step(struct task_struct *task) ptrace_disable(task); /* Don't wake up the task, but let the parent know something happened. */ - force_sig_fault(SIGTRAP, TRAP_TRACE, - (void __user *) (task_regs(task)->iaoq[0] & ~3), - task); + force_sig_fault_to_task(SIGTRAP, TRAP_TRACE, + (void __user *) (task_regs(task)->iaoq[0] & ~3), + task); /* notify_parent(task, SIGCHLD); */ return; } diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 4178bb1f7709..507af66a1fc8 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -307,6 +307,10 @@ static inline void kernel_signal_stop(void) # define ___ARCH_SI_IA64(_a1, _a2, _a3) #endif +int force_sig_fault_to_task(int sig, int code, void __user *addr + ___ARCH_SI_TRAPNO(int trapno) + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t); int force_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) diff --git a/kernel/signal.c b/kernel/signal.c index 398489facf9f..e420489ac4c9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1620,7 +1620,7 @@ void force_sigsegv(int sig) force_sig(SIGSEGV); } -int force_sig_fault(int sig, int code, void __user *addr +int force_sig_fault_to_task(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) , struct task_struct *t) @@ -1643,6 +1643,16 @@ int force_sig_fault(int sig, int code, void __user *addr return force_sig_info(info.si_signo, &info, t); } +int force_sig_fault(int sig, int code, void __user *addr + ___ARCH_SI_TRAPNO(int trapno) + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t) +{ + return force_sig_fault_to_task(sig, code, addr + ___ARCH_SI_TRAPNO(trapno) + ___ARCH_SI_IA64(imm, flags, isr), t); +} + int send_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) -- cgit v1.2.3 From 2e1661d2673667d886cd40ad9f414cb6db48d8da Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 23 May 2019 11:04:24 -0500 Subject: signal: Remove the task parameter from force_sig_fault As synchronous exceptions really only make sense against the current task (otherwise how are you synchronous) remove the task parameter from from force_sig_fault to make it explicit that is what is going on. The two known exceptions that deliver a synchronous exception to a stopped ptraced task have already been changed to force_sig_fault_to_task. The callers have been changed with the following emacs regular expression (with obvious variations on the architectures that take more arguments) to avoid typos: force_sig_fault[(]\([^,]+\)[,]\([^,]+\)[,]\([^,]+\)[,]\W+current[)] -> force_sig_fault(\1,\2,\3) Signed-off-by: "Eric W. Biederman" --- arch/alpha/kernel/traps.c | 2 +- arch/alpha/mm/fault.c | 4 ++-- arch/arc/kernel/traps.c | 2 +- arch/arc/mm/fault.c | 4 ++-- arch/arm/kernel/ptrace.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/c6x/kernel/traps.c | 2 +- arch/csky/abiv1/alignment.c | 2 +- arch/csky/abiv2/fpu.c | 2 +- arch/csky/kernel/traps.c | 2 +- arch/csky/mm/fault.c | 4 ++-- arch/hexagon/kernel/traps.c | 2 +- arch/hexagon/mm/vm_fault.c | 4 ++-- arch/ia64/kernel/brl_emu.c | 6 ++--- arch/ia64/kernel/traps.c | 18 +++++++------- arch/ia64/kernel/unaligned.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m68k/kernel/traps.c | 4 ++-- arch/m68k/mm/fault.c | 4 ++-- arch/microblaze/kernel/exceptions.c | 2 +- arch/microblaze/mm/fault.c | 2 +- arch/mips/kernel/traps.c | 12 +++++----- arch/mips/mm/fault.c | 4 ++-- arch/nds32/kernel/fpu.c | 2 +- arch/nds32/kernel/traps.c | 4 ++-- arch/nds32/mm/fault.c | 4 ++-- arch/nios2/kernel/traps.c | 2 +- arch/openrisc/kernel/traps.c | 8 +++---- arch/openrisc/mm/fault.c | 4 ++-- arch/parisc/kernel/traps.c | 14 +++++------ arch/parisc/kernel/unaligned.c | 4 ++-- arch/parisc/math-emu/driver.c | 2 +- arch/parisc/mm/fault.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/traps.c | 4 ++-- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 9 ++++--- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/kernel/traps.c | 6 ++--- arch/s390/mm/fault.c | 6 ++--- arch/sh/kernel/hw_breakpoint.c | 2 +- arch/sh/kernel/traps_32.c | 4 ++-- arch/sh/math-emu/math.c | 2 +- arch/sh/mm/fault.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/sparc/kernel/sys_sparc_32.c | 2 +- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/sparc/kernel/traps_32.c | 4 ++-- arch/sparc/kernel/traps_64.c | 39 ++++++++++++++----------------- arch/sparc/mm/fault_32.c | 2 +- arch/sparc/mm/fault_64.c | 2 +- arch/um/kernel/ptrace.c | 3 +-- arch/um/kernel/trap.c | 12 ++++------ arch/unicore32/kernel/traps.c | 2 +- arch/unicore32/mm/fault.c | 2 +- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- arch/x86/kernel/umip.c | 2 +- arch/x86/mm/fault.c | 7 +++--- arch/xtensa/kernel/traps.c | 2 +- arch/xtensa/mm/fault.c | 4 ++-- include/linux/sched/signal.h | 3 +-- kernel/signal.c | 5 ++-- 67 files changed, 137 insertions(+), 151 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index bc9627698796..f6b9664ac504 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -402,7 +402,7 @@ do_entDbg(struct pt_regs *regs) { die_if_kernel("Instruction fault", regs, 0, NULL); - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0); } diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 188fc9256baf..741e61ef9d3f 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -221,13 +221,13 @@ retry: up_read(&mm->mmap_sem); /* Send a sigbus, regardless of whether we were in kernel or user mode. */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0); if (!user_mode(regs)) goto no_context; return; do_sigsegv: - force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0); return; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index e618fbb3e28d..fc56efc25488 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -50,7 +50,7 @@ unhandled_exception(const char *str, struct pt_regs *regs, tsk->thread.fault_address = (__force unsigned int)addr; - force_sig_fault(signo, si_code, addr, current); + force_sig_fault(signo, si_code, addr); } else { /* If not due to copy_(to|from)_user, we are doomed */ diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d5d4758d7e75..5001f6418e92 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -202,7 +202,7 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -237,5 +237,5 @@ do_sigbus: goto no_context; tsk->thread.fault_address = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index f9cbd08a9075..1512d6b5e1cf 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -204,7 +204,7 @@ void ptrace_disable(struct task_struct *child) void ptrace_break(struct pt_regs *regs) { force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)instruction_pointer(regs), current); + (void __user *)instruction_pointer(regs)); } static int break_trap(struct pt_regs *regs, unsigned int instr) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 288989c7355d..a32342fa3e4a 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -372,7 +372,7 @@ void arm_notify_die(const char *str, struct pt_regs *regs, current->thread.error_code = err; current->thread.trap_no = trap; - force_sig_fault(signo, si_code, addr, current); + force_sig_fault(signo, si_code, addr); } else { die(str, regs, err); } diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index e376883ab35b..a6fffd788c9c 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -948,7 +948,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto fixup; if (ai_usermode & UM_SIGNAL) { - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } else { /* * We're about to disable the alignment trap and return to diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 03007ea4cc72..49e8ec2e9e7b 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -184,7 +184,7 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - force_sig_fault(sig, code, (void __user *)addr, current); + force_sig_fault(sig, code, (void __user *)addr); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c76a64c1bcb3..a490a4a32e77 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -259,7 +259,7 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr, if (signo == SIGKILL) force_sig(SIGKILL); else - force_sig_fault(signo, code, addr, current); + force_sig_fault(signo, code, addr); } void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 5c60aea3b75a..ca54d1dd2aee 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -253,7 +253,7 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs) die_if_kernel(except_info->kernel_str, regs, addr); force_sig_fault(except_info->signo, except_info->code, - (void __user *)addr, current); + (void __user *)addr); } /* diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index d789be36eb4f..27ef5b2c43ab 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -283,7 +283,7 @@ bad_area: do_exit(SIGKILL); } - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } static struct ctl_table alignment_tbl[4] = { diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c index e7e11344005a..86d187d4e5af 100644 --- a/arch/csky/abiv2/fpu.c +++ b/arch/csky/abiv2/fpu.c @@ -124,7 +124,7 @@ void fpu_fpe(struct pt_regs *regs) code = FPE_FLTRES; } - force_sig_fault(sig, code, (void __user *)regs->pc, current); + force_sig_fault(sig, code, (void __user *)regs->pc); } #define FMFVR_FPU_REGS(vrx, vry) \ diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index f487a9b996ae..2792e9601ac5 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -106,7 +106,7 @@ void buserr(struct pt_regs *regs) pr_err("User mode Bus Error\n"); show_regs(regs); - force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc, current); + force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc); } #define USR_BKPT 0x1464 diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 18041f46ded1..f76618b630f9 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -179,7 +179,7 @@ bad_area: bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -212,5 +212,5 @@ do_sigbus: if (!user_mode(regs)) goto no_context; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index e634414361df..b8a69b2e3f3d 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -420,7 +420,7 @@ void do_trap0(struct pt_regs *regs) * may want to use a different trap0 flavor. */ force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *) pt_elr(regs), current); + (void __user *) pt_elr(regs)); } else { #ifdef CONFIG_KGDB kgdb_handle_exception(pt_cause(regs), SIGTRAP, diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index eb263e61daf4..2b3e22509cdf 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -148,14 +148,14 @@ good_area: si_signo = SIGSEGV; si_code = SEGV_ACCERR; } - force_sig_fault(si_signo, si_code, (void __user *)address, current); + force_sig_fault(si_signo, si_code, (void __user *)address); return; bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } /* Kernel-mode fault falls through */ diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index c0239bf77a09..782c481d7052 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -197,21 +197,21 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) */ printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } else if (ia64_psr(regs)->tb) { /* * Branch Tracing is enabled. * Force a taken branch signal. */ force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } else if (ia64_psr(regs)->ss) { /* * Single Step is enabled. * Force a trace signal. */ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } return rv; } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 0a3adbfebc2a..e13cb905930f 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -176,7 +176,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) } force_sig_fault(sig, code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - break_num, 0 /* clear __ISR_VALID */, 0, current); + break_num, 0 /* clear __ISR_VALID */, 0); } /* @@ -353,7 +353,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } force_sig_fault(SIGFPE, si_code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } } else { if (exception == -1) { @@ -373,7 +373,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } force_sig_fault(SIGFPE, si_code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } } return 0; @@ -408,7 +408,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *) (regs.cr_iip + ia64_psr(®s)->ri), - 0, 0, 0, current); + 0, 0, 0); return rv; } @@ -483,7 +483,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + ia64_psr(®s)->ri); } force_sig_fault(sig, code, addr, - vector, __ISR_VALID, isr, current); + vector, __ISR_VALID, isr); return; } else if (ia64_done_with_exception(®s)) return; @@ -493,7 +493,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 31: /* Unsupported Data Reference */ if (user_mode(®s)) { force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip, - vector, __ISR_VALID, isr, current); + vector, __ISR_VALID, isr); return; } sprintf(buf, "Unsupported data reference"); @@ -542,7 +542,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, == NOTIFY_STOP) return; force_sig_fault(SIGTRAP, si_code, (void __user *) ifa, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); return; case 32: /* fp fault */ @@ -550,7 +550,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } return; @@ -578,7 +578,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, if (user_mode(®s)) { force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *) iip, - 0, 0, 0, current); + 0, 0, 0); return; } sprintf(buf, "Unimplemented Instruction Address fault"); diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index a167a3824b35..eb7d5df59fa3 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1537,6 +1537,6 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) } force_sigbus: force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa, - 0, 0, 0, current); + 0, 0, 0); goto done; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 5baeb022f474..3c3a283d3172 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -249,7 +249,7 @@ retry: } if (user_mode(regs)) { force_sig_fault(signal, code, (void __user *) address, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); return; } diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 2b6e143abd73..344f93d36a9a 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1127,7 +1127,7 @@ asmlinkage void trap_c(struct frame *fp) addr = (void __user*) fp->un.fmtb.daddr; break; } - force_sig_fault(sig, si_code, addr, current); + force_sig_fault(sig, si_code, addr); } void die_if_kernel (char *str, struct pt_regs *fp, int nr) @@ -1159,6 +1159,6 @@ asmlinkage void fpsp040_die(void) #ifdef CONFIG_M68KFPU_EMU asmlinkage void fpemu_signal(int signal, int code, void *addr) { - force_sig_fault(signal, code, addr, current); + force_sig_fault(signal, code, addr); } #endif diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 9b6163c05a75..e9b1d7585b43 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -30,13 +30,13 @@ int send_fault_sig(struct pt_regs *regs) pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code); if (user_mode(regs)) { - force_sig_fault(signo, si_code, addr, current); + force_sig_fault(signo, si_code, addr); } else { if (fixup_exception(regs)) return -1; //if (signo == SIGBUS) - // force_sig_fault(si_signo, si_code, addr, current); + // force_sig_fault(si_signo, si_code, addr); /* * Oops. The kernel tried to access some bad page. We'll have to diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index eafff21fcb0e..cf99c411503e 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -63,7 +63,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (kernel_mode(regs)) die("Exception in kernel mode", regs, signr); - force_sig_fault(signr, code, (void __user *)addr, current); + force_sig_fault(signr, code, (void __user *)addr); } asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 202ad6a494f5..e6a810b0c7ad 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -289,7 +289,7 @@ out_of_memory: do_sigbus: up_read(&mm->mmap_sem); if (user_mode(regs)) { - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return; } bad_page_fault(regs, address, SIGBUS); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 62df48b6fb46..be4a7b25269c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -705,7 +705,7 @@ asmlinkage void do_ov(struct pt_regs *regs) prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current); + force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc); exception_exit(prev_state); } @@ -750,7 +750,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 1; case SIGBUS: - force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current); + force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr); return 1; case SIGSEGV: @@ -761,7 +761,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) else si_code = SEGV_MAPERR; up_read(¤t->mm->mmap_sem); - force_sig_fault(SIGSEGV, si_code, fault_addr, current); + force_sig_fault(SIGSEGV, si_code, fault_addr); return 1; default: @@ -943,7 +943,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, die_if_kernel(b, regs); force_sig_fault(SIGFPE, code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF, - (void __user *) regs->cp0_epc, current); + (void __user *) regs->cp0_epc); break; case BRK_BUG: die_if_kernel("Kernel bug detected", regs); @@ -968,7 +968,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, scnprintf(b, sizeof(b), "%s instruction in kernel code", str); die_if_kernel(b, regs); if (si_code) { - force_sig_fault(SIGTRAP, si_code, NULL, current); + force_sig_fault(SIGTRAP, si_code, NULL); } else { force_sig(SIGTRAP); } @@ -1521,7 +1521,7 @@ asmlinkage void do_watch(struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) { mips_read_watch_registers(); local_irq_enable(); - force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL); } else { mips_clear_watch_registers(); local_irq_enable(); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index e63abd492f65..f589aa8f47d9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -223,7 +223,7 @@ bad_area_nosemaphore: pr_cont("\n"); } current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -279,7 +279,7 @@ do_sigbus: #endif current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; tsk->thread.cp0_badvaddr = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return; #ifndef CONFIG_64BIT diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c index fddd40c7a16f..1f8694c6bd5a 100644 --- a/arch/nds32/kernel/fpu.c +++ b/arch/nds32/kernel/fpu.c @@ -246,7 +246,7 @@ inline void handle_fpu_exception(struct pt_regs *regs) } force_sig_fault(si_signo, si_code, - (void __user *)instruction_pointer(regs), current); + (void __user *)instruction_pointer(regs)); done: own_fpu(); } diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index a16e97f7bc75..f4d386b52622 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -205,7 +205,7 @@ int bad_syscall(int n, struct pt_regs *regs) } force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)instruction_pointer(regs) - 4, current); + (void __user *)instruction_pointer(regs) - 4); die_if_kernel("Oops - bad syscall", regs, n); return regs->uregs[0]; } @@ -263,7 +263,7 @@ static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code) tsk->thread.error_code = error_code; force_sig_fault(SIGTRAP, si_code, - (void __user *)instruction_pointer(regs), current); + (void __user *)instruction_pointer(regs)); } void do_debug_trap(unsigned long entry, unsigned long addr, diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 38441113c202..064ae5d2159d 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -271,7 +271,7 @@ bad_area_nosemaphore: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - force_sig_fault(SIGSEGV, si_code, (void __user *)addr, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)addr); return; } @@ -340,7 +340,7 @@ do_sigbus: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); return; diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 3bc3cd22b750..486db793923c 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(die_lock); static void _send_sig(int signo, int code, unsigned long addr) { - force_sig_fault(signo, code, (void __user *) addr, current); + force_sig_fault(signo, code, (void __user *) addr); } void die(const char *str, struct pt_regs *regs, long err) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 0fad2e46ff43..a4cc6e59c57f 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -249,7 +249,7 @@ void __init trap_init(void) asmlinkage void do_trap(struct pt_regs *regs, unsigned long address) { - force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current); + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address); regs->pc += 4; } @@ -258,7 +258,7 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) { if (user_mode(regs)) { /* Send a SIGBUS */ - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); @@ -271,7 +271,7 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) { if (user_mode(regs)) { /* Send a SIGBUS */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { /* Kernel mode */ printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address); show_registers(regs); @@ -466,7 +466,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs, if (user_mode(regs)) { /* Send a SIGILL */ - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address); } else { /* Kernel mode */ printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n", address); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index f8b3a5a6ba3a..ae9468c22c9d 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -213,7 +213,7 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -278,7 +278,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 096e319adeb3..58dcf445e32f 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -275,7 +275,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) static void handle_gdb_break(struct pt_regs *regs, int wot) { force_sig_fault(SIGTRAP, wot, - (void __user *) (regs->iaoq[0] & ~3), current); + (void __user *) (regs->iaoq[0] & ~3)); } static void handle_break(struct pt_regs *regs) @@ -609,13 +609,13 @@ void notrace handle_interruption(int code, struct pt_regs *regs) si_code = ILL_PRVREG; give_sigill: force_sig_fault(SIGILL, si_code, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; case 12: /* Overflow Trap, let the userland signal handler do the cleanup */ force_sig_fault(SIGFPE, FPE_INTOVF, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; case 13: @@ -627,7 +627,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * to by si_addr. */ force_sig_fault(SIGFPE, FPE_CONDTRAP, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; } /* The kernel doesn't want to handle condition codes */ @@ -739,7 +739,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) force_sig_fault(SIGSEGV, SEGV_MAPERR, (code == 7)? ((void __user *) regs->iaoq[0]) : - ((void __user *) regs->ior), current); + ((void __user *) regs->ior)); return; case 28: @@ -754,7 +754,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) task_pid_nr(current), current->comm); /* SIGBUS, for lack of a better one. */ force_sig_fault(SIGBUS, BUS_OBJERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); return; } pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); @@ -770,7 +770,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) code, fault_space, task_pid_nr(current), current->comm); force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); return; } } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 932bfc0b7cd8..3ccc3a69469c 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -690,14 +690,14 @@ void handle_unaligned(struct pt_regs *regs) if (ret == ERR_PAGEFAULT) { force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); } else { force_sigbus: /* couldn't handle it ... */ force_sig_fault(SIGBUS, BUS_ADRALN, - (void __user *)regs->ior, current); + (void __user *)regs->ior); } return; diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 0590e05571d1..f3e0bddcbb38 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -117,7 +117,7 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return -1; } diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 56ceacb3401d..6dd4669ce7a5 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -409,7 +409,7 @@ bad_area: #endif show_signal_msg(regs, code, address, tsk, vma); - force_sig_fault(signo, si_code, (void __user *) address, current); + force_sig_fault(signo, si_code, (void __user *) address); return; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 87da40129927..1b5b1477afa2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -643,7 +643,7 @@ void do_break (struct pt_regs *regs, unsigned long address, hw_breakpoint_disable(); /* Deliver the signal to userspace */ - force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 83e59fdaa62d..dfc61f2f69a0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -301,7 +301,7 @@ NOKPROBE_SYMBOL(die); void user_single_step_report(struct pt_regs *regs) { - force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current); + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip); } static void show_signal_msg(int signr, struct pt_regs *regs, int code, @@ -367,7 +367,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (!exception_common(signr, regs, code, addr)) return; - force_sig_fault(signr, code, (void __user *)addr, current); + force_sig_fault(signr, code, (void __user *)addr); } /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6ed6c341c670..02c70fa535ef 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -187,7 +187,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, } #endif - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return 0; } diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 971ac43b5d60..6634c0c5ed9e 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -44,22 +44,21 @@ static void spufs_handle_event(struct spu_context *ctx, switch (type) { case SPE_EVENT_INVALID_DMA: - force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current); + force_sig_fault(SIGBUS, BUS_OBJERR, NULL); break; case SPE_EVENT_SPE_DATA_STORAGE: ctx->ops->restart_dma(ctx); - force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea, - current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea); break; case SPE_EVENT_DMA_ALIGNMENT: /* DAR isn't set for an alignment fault :( */ - force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current); + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); break; case SPE_EVENT_SPE_ERROR: force_sig_fault( SIGILL, ILL_ILLOPC, (void __user *)(unsigned long) - ctx->ops->npc_read(ctx) - 4, current); + ctx->ops->npc_read(ctx) - 4); break; } } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 6d67892dfc82..859ab550d52a 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -76,7 +76,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) show_regs(regs); } - force_sig_fault(signo, code, (void __user *)addr, current); + force_sig_fault(signo, code, (void __user *)addr); } static void do_trap_error(struct pt_regs *regs, int signo, int code, @@ -149,7 +149,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs) } #endif /* CONFIG_GENERIC_BUG */ - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc)); } #ifdef CONFIG_GENERIC_BUG diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 82e81a9f7112..ac44dbfc4a7e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -45,7 +45,7 @@ int is_valid_bugaddr(unsigned long addr) void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { if (user_mode(regs)) { - force_sig_fault(si_signo, si_code, get_trap_ip(regs), current); + force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; @@ -79,7 +79,7 @@ void do_per_trap(struct pt_regs *regs) if (!current->ptrace) return; force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address, current); + (void __force __user *) current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -165,7 +165,7 @@ void illegal_op(struct pt_regs *regs) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { if (current->ptrace) - force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index c220399ae196..79afed544cac 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -243,8 +243,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { report_user_fault(regs, SIGSEGV, 1); force_sig_fault(SIGSEGV, si_code, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), - current); + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } const struct exception_table_entry *s390_search_extables(unsigned long addr) @@ -305,8 +304,7 @@ static noinline void do_sigbus(struct pt_regs *regs) * or user mode. */ force_sig_fault(SIGBUS, BUS_ADRERR, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), - current); + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } static noinline int signal_return(struct pt_regs *regs) diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index bc96b16288c1..3bd010b4c55f 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -338,7 +338,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* Deliver the signal to userspace */ if (!arch_check_bp_in_kernelspace(&bp->hw.info)) { force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __user *)NULL, current); + (void __user *)NULL); } rcu_read_unlock(); diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index bd5568c8e7f0..058c6181bb30 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -533,7 +533,7 @@ uspace_segv: "access (PC %lx PR %lx)\n", current->comm, regs->pc, regs->pr); - force_sig_fault(SIGBUS, si_code, (void __user *)address, current); + force_sig_fault(SIGBUS, si_code, (void __user *)address); } else { inc_unaligned_kernel_access(); @@ -603,7 +603,7 @@ asmlinkage void do_divide_error(unsigned long r4) /* Let gcc know unhandled cases don't make it past here */ return; } - force_sig_fault(SIGFPE, code, NULL, current); + force_sig_fault(SIGFPE, code, NULL); } #endif diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index fe261b0983cc..e8be0eca0444 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -560,7 +560,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) task_thread_info(tsk)->status |= TS_USEDFPU; } else { force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc, current); + (void __user *)regs->pc); } regs->pc = nextpc; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 851a3cbb2b9c..3093bc372138 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -41,7 +41,7 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap) static void force_sig_info_fault(int si_signo, int si_code, unsigned long address) { - force_sig_fault(si_signo, si_code, (void __user *)address, current); + force_sig_fault(si_signo, si_code, (void __user *)address); } /* diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c4bccd97f3cf..4282116e28e7 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -519,7 +519,7 @@ void synchronize_user_stack(void) static void stack_unaligned(unsigned long sp) { - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0); } static const char uwfault32[] = KERN_INFO \ diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 452e4d080855..be77538bc038 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -151,7 +151,7 @@ sparc_breakpoint (struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc); #endif - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 9825ca6a6020..ccc88926bc00 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -511,7 +511,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc); #endif - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc); #endif diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index bcdfc6168dd5..4ceecad556a9 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -103,7 +103,7 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type) die_if_kernel("Kernel bad trap", regs); force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)regs->pc, type - 0x80, current); + (void __user *)regs->pc, type - 0x80); } void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc, @@ -327,7 +327,7 @@ void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n", pc, npc, psr); #endif - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0); } void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc, diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 12bfc7e215ca..614d92c18506 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -107,7 +107,7 @@ void bad_trap(struct pt_regs *regs, long lvl) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)regs->tpc, lvl, current); + (void __user *)regs->tpc, lvl); } void bad_trap_tl1(struct pt_regs *regs, long lvl) @@ -201,7 +201,7 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un regs->tnpc &= 0xffffffff; } force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -236,7 +236,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0); } void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) @@ -321,7 +321,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0); out: exception_exit(prev_state); } @@ -385,16 +385,13 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig */ switch (type) { case HV_FAULT_TYPE_INV_ASI: - force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0, - current); + force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0); break; case HV_FAULT_TYPE_MCD_DIS: - force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0, - current); + force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0); break; default: - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0, - current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0); break; } } @@ -571,7 +568,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0); } void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) @@ -2073,7 +2070,7 @@ void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent) * code */ force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr, - 0, current); + 0); } /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. @@ -2187,7 +2184,7 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, } if (attrs & SUN4V_ERR_ATTRS_PIO) { force_sig_fault(SIGBUS, BUS_ADRERR, - (void __user *)sun4v_get_vaddr(regs), 0, current); + (void __user *)sun4v_get_vaddr(regs), 0); return true; } @@ -2344,7 +2341,7 @@ static void do_fpe_common(struct pt_regs *regs) code = FPE_FLTRES; } force_sig_fault(SIGFPE, code, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); } } @@ -2399,7 +2396,7 @@ void do_tof(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGEMT, EMT_TAGOVF, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -2419,7 +2416,7 @@ void do_div0(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGFPE, FPE_INTDIV, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -2615,7 +2612,7 @@ void do_illegal_instruction(struct pt_regs *regs) } } } - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0); out: exception_exit(prev_state); } @@ -2635,7 +2632,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0); out: exception_exit(prev_state); } @@ -2653,7 +2650,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0); } /* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI @@ -2700,7 +2697,7 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr, regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current); + force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0); } void do_privop(struct pt_regs *regs) @@ -2716,7 +2713,7 @@ void do_privop(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGILL, ILL_PRVOPC, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 2731faf415ba..8d69de111470 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -131,7 +131,7 @@ static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs, show_signal_msg(regs, sig, code, addr, current); - force_sig_fault(sig, code, (void __user *) addr, 0, current); + force_sig_fault(sig, code, (void __user *) addr, 0); } static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 8f8a604c1300..83fda4d9c3b2 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -187,7 +187,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, if (unlikely(show_unhandled_signals)) show_signal_msg(regs, sig, code, addr, current); - force_sig_fault(sig, code, (void __user *) addr, 0, current); + force_sig_fault(sig, code, (void __user *) addr, 0); } static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 1797dfe9ce6d..da1e96b1ec3e 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -117,8 +117,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) /* Send us the fake SIGTRAP */ force_sig_fault(SIGTRAP, TRAP_BRKPT, /* User-mode eip? */ - UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, - current); + UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL); } /* diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 1c943c66063f..58fe36856182 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -163,8 +163,7 @@ static void show_segv_info(struct uml_pt_regs *regs) static void bad_segv(struct faultinfo fi, unsigned long ip) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi), - current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi)); } void fatal_sigsegv(void) @@ -268,13 +267,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (err == -EACCES) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, - current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { BUG_ON(err != -EFAULT); current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, si_code, (void __user *) address, - current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address); } out: @@ -304,8 +301,7 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { struct faultinfo *fi = UPT_FAULTINFO(regs); current->thread.arch.faultinfo = *fi; - force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi), - current); + force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi)); } else { printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", sig, code, err); diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index fb376d83e043..a0878035cda7 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -248,7 +248,7 @@ void uc32_notify_die(const char *str, struct pt_regs *regs, current->thread.error_code = err; current->thread.trap_no = trap; - force_sig_fault(sig, code, addr, current); + force_sig_fault(sig, code, addr); } else die(str, regs, err); } diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 313547a93513..c85ba5339c1f 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -124,7 +124,7 @@ static void __do_user_fault(unsigned long addr, unsigned int fsr, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - force_sig_fault(sig, code, (void __user *)addr, current); + force_sig_fault(sig, code, (void __user *)addr); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 7ea87f4ad0b7..2f31faf339d5 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -106,7 +106,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr); return false; } else { return true; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 34d27b2dc7a1..8f8f197389db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1369,7 +1369,7 @@ void send_sigtrap(struct pt_regs *regs, int error_code, int si_code) /* Send us the fake SIGTRAP */ force_sig_fault(SIGTRAP, si_code, - user_mode(regs) ? (void __user *)regs->ip : NULL, current); + user_mode(regs) ? (void __user *)regs->ip : NULL); } void user_single_step_report(struct pt_regs *regs) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 945b9a0719dd..87095a477154 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -256,7 +256,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, if (!sicode) force_sig(signr); else - force_sig_fault(signr, sicode, addr, current); + force_sig_fault(signr, sicode, addr); } NOKPROBE_SYMBOL(do_trap); @@ -856,7 +856,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) return; force_sig_fault(SIGFPE, si_code, - (void __user *)uprobe_get_trap_addr(regs), current); + (void __user *)uprobe_get_trap_addr(regs)); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 68cdcd717c85..5b345add550f 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, addr); if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 16a5d1b615a7..46ac96aa7c81 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -756,8 +756,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, set_signal_archinfo(address, error_code); /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address, - current); + force_sig_fault(signal, si_code, (void __user *)address); } /* @@ -918,7 +917,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -1044,7 +1043,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, return; } #endif - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } static noinline void diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 6f26b254091b..f060348c1b23 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -330,7 +330,7 @@ do_unaligned_user (struct pt_regs *regs) "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, task_pid_nr(current), regs->pc); - force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr); } #endif diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 2ab0e0dcd166..f81b1478da61 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -157,7 +157,7 @@ bad_area: if (user_mode(regs)) { current->thread.bad_vaddr = address; current->thread.error_code = is_write; - force_sig_fault(SIGSEGV, code, (void *) address, current); + force_sig_fault(SIGSEGV, code, (void *) address); return; } bad_page_fault(regs, address, SIGSEGV); @@ -182,7 +182,7 @@ do_sigbus: * or user mode. */ current->thread.bad_vaddr = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 507af66a1fc8..7f872506e1de 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -313,8 +313,7 @@ int force_sig_fault_to_task(int sig, int code, void __user *addr , struct task_struct *t); int force_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); int send_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) diff --git a/kernel/signal.c b/kernel/signal.c index e420489ac4c9..d92b636b4e9d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1645,12 +1645,11 @@ int force_sig_fault_to_task(int sig, int code, void __user *addr int force_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t) + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)) { return force_sig_fault_to_task(sig, code, addr ___ARCH_SI_TRAPNO(trapno) - ___ARCH_SI_IA64(imm, flags, isr), t); + ___ARCH_SI_IA64(imm, flags, isr), current); } int send_sig_fault(int sig, int code, void __user *addr -- cgit v1.2.3 From a89e9b8abf82725e4ac96100e07c8104dbe8a240 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 May 2019 10:11:09 -0500 Subject: signal: Remove the signal number and task parameters from force_sig_info force_sig_info always delivers to the current task and the signal parameter always matches info.si_signo. So remove those parameters to make it a simpler less error prone interface, and to make it clear that none of the callers are doing anything clever. This guarantees that force_sig_info will not grow any new buggy callers that attempt to call force_sig on a non-current task, or that pass an signal number that does not match info.si_signo. Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 2 +- include/linux/sched/signal.h | 2 +- kernel/seccomp.c | 2 +- kernel/signal.c | 14 +++++++------- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index d5084ebd9f03..2a9df80ea887 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -355,7 +355,7 @@ static inline void user_single_step_report(struct pt_regs *regs) info.si_code = SI_USER; info.si_pid = 0; info.si_uid = 0; - force_sig_info(info.si_signo, &info, current); + force_sig_info(&info); } #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7f872506e1de..532458698bde 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -329,7 +329,7 @@ int force_sig_ptrace_errno_trap(int errno, void __user *addr); extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern void force_sigsegv(int sig); -extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *); +extern int force_sig_info(struct kernel_siginfo *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *, diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 811b4a86cdf6..dba52a7db5e8 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -609,7 +609,7 @@ static void seccomp_send_sigsys(int syscall, int reason) { struct kernel_siginfo info; seccomp_init_siginfo(&info, syscall, reason); - force_sig_info(SIGSYS, &info, current); + force_sig_info(&info); } #endif /* CONFIG_SECCOMP_FILTER */ diff --git a/kernel/signal.c b/kernel/signal.c index 0984158cd41a..ff6944e4964e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1325,9 +1325,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t) return ret; } -int force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t) +int force_sig_info(struct kernel_siginfo *info) { - return force_sig_info_to_task(info, t); + return force_sig_info_to_task(info, current); } /* @@ -1619,7 +1619,7 @@ void force_sig(int sig) info.si_code = SI_KERNEL; info.si_pid = 0; info.si_uid = 0; - force_sig_info(info.si_signo, &info, current); + force_sig_info(&info); } EXPORT_SYMBOL(force_sig); @@ -1708,7 +1708,7 @@ int force_sig_mceerr(int code, void __user *addr, short lsb) info.si_code = code; info.si_addr = addr; info.si_addr_lsb = lsb; - return force_sig_info(info.si_signo, &info, current); + return force_sig_info(&info); } int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) @@ -1737,7 +1737,7 @@ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper) info.si_addr = addr; info.si_lower = lower; info.si_upper = upper; - return force_sig_info(info.si_signo, &info, current); + return force_sig_info(&info); } #ifdef SEGV_PKUERR @@ -1751,7 +1751,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey) info.si_code = SEGV_PKUERR; info.si_addr = addr; info.si_pkey = pkey; - return force_sig_info(info.si_signo, &info, current); + return force_sig_info(&info); } #endif @@ -1767,7 +1767,7 @@ int force_sig_ptrace_errno_trap(int errno, void __user *addr) info.si_errno = errno; info.si_code = TRAP_HWBKPT; info.si_addr = addr; - return force_sig_info(info.si_signo, &info, current); + return force_sig_info(&info); } int kill_pgrp(struct pid *pid, int sig, int priv) -- cgit v1.2.3 From 279758f8001f0014b15656a4ef130a20852f6df6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 May 2019 15:02:31 +0800 Subject: rhashtable: Add rht_ptr_rcu and improve rht_ptr This patch moves common code between rht_ptr and rht_ptr_exclusive into __rht_ptr. It also adds a new helper rht_ptr_rcu exclusively for the RCU case. This way rht_ptr becomes a lock-only construct so we can use the lighter rcu_dereference_protected primitive. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 9f8bc06d4136..beb9a9da1699 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -352,37 +352,38 @@ static inline void rht_unlock(struct bucket_table *tbl, static inline struct rhash_head __rcu *__rht_ptr( struct rhash_lock_head *const *bkt) { - return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0)); + return (struct rhash_head __rcu *) + ((unsigned long)*bkt & ~BIT(0) ?: + (unsigned long)RHT_NULLS_MARKER(bkt)); } /* * Where 'bkt' is a bucket and might be locked: - * rht_ptr() dereferences that pointer and clears the lock bit. + * rht_ptr_rcu() dereferences that pointer and clears the lock bit. + * rht_ptr() dereferences in a context where the bucket is locked. * rht_ptr_exclusive() dereferences in a context where exclusive * access is guaranteed, such as when destroying the table. */ +static inline struct rhash_head *rht_ptr_rcu( + struct rhash_lock_head *const *bkt) +{ + struct rhash_head __rcu *p = __rht_ptr(bkt); + + return rcu_dereference(p); +} + static inline struct rhash_head *rht_ptr( struct rhash_lock_head *const *bkt, struct bucket_table *tbl, unsigned int hash) { - struct rhash_head __rcu *p = __rht_ptr(bkt); - - if (!p) - return RHT_NULLS_MARKER(bkt); - - return rht_dereference_bucket_rcu(p, tbl, hash); + return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash); } static inline struct rhash_head *rht_ptr_exclusive( struct rhash_lock_head *const *bkt) { - struct rhash_head __rcu *p = __rht_ptr(bkt); - - if (!p) - return RHT_NULLS_MARKER(bkt); - - return rcu_dereference_protected(p, 1); + return rcu_dereference_protected(__rht_ptr(bkt), 1); } static inline void rht_assign_locked(struct rhash_lock_head **bkt, @@ -509,7 +510,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, */ #define rht_for_each_rcu(pos, tbl, hash) \ for (({barrier(); }), \ - pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \ + pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) @@ -546,8 +547,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ rht_for_each_entry_rcu_from(tpos, pos, \ - rht_ptr(rht_bucket(tbl, hash), \ - tbl, hash), \ + rht_ptr_rcu(rht_bucket(tbl, hash)), \ tbl, hash, member) /** @@ -603,7 +603,7 @@ restart: hash = rht_key_hashfn(ht, tbl, key, params); bkt = rht_bucket(tbl, hash); do { - rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) -- cgit v1.2.3 From 44cc27e43fa3b8977373915a8e7f515a9d263343 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 28 May 2019 20:38:12 +0300 Subject: net: phylink: Add struct phylink_config to PHYLINK API The phylink_config structure will encapsulate a pointer to a struct device and the operation type requested for this instance of PHYLINK. This patch does not make any functional changes, it just transitions the PHYLINK internals and all its users to the new API. A pointer to a phylink_config structure will be passed to phylink_create() instead of the net_device directly. Also, the same phylink_config pointer will be passed back to all phylink_mac_ops callbacks instead of the net_device. Using this mechanism, a PHYLINK user can get the original net_device using a structure such as 'to_net_dev(config->dev)' or directly the structure containing the phylink_config using a container_of call. At the moment, only the PHYLINK_NETDEV is defined as a valid operation type for PHYLINK. In this mode, a valid reference to a struct device linked to the original net_device should be passed to PHYLINK through the phylink_config structure. This API changes is mainly driven by the necessity of adding a new operation type in PHYLINK that disconnects the phy_device from the net_device and also works when the net_device is lacking. Signed-off-by: Ioana Ciornei Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Signed-off-by: David S. Miller --- Documentation/networking/sfp-phylink.rst | 5 ++- drivers/net/ethernet/marvell/mvneta.c | 36 ++++++++++------ drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1 + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 43 +++++++++++-------- drivers/net/phy/phylink.c | 26 ++++++++---- include/linux/phylink.h | 56 ++++++++++++++++--------- include/net/dsa.h | 2 + net/dsa/slave.c | 31 ++++++++------ 8 files changed, 128 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index 5bd26cb07244..91446b431b70 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -98,6 +98,7 @@ this documentation. 4. Add:: struct phylink *phylink; + struct phylink_config phylink_config; to the driver's private data structure. We shall refer to the driver's private data pointer as ``priv`` below, and the driver's @@ -223,8 +224,10 @@ this documentation. .. code-block:: c struct phylink *phylink; + priv->phylink_config.dev = &dev.dev; + priv->phylink_config.type = PHYLINK_NETDEV; - phylink = phylink_create(dev, node, phy_mode, &phylink_ops); + phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); fail probe; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e758650b2c26..adbbcdde73e6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -437,6 +437,7 @@ struct mvneta_port { struct device_node *dn; unsigned int tx_csum_limit; struct phylink *phylink; + struct phylink_config phylink_config; struct phy *comphy; struct mvneta_bm *bm_priv; @@ -3356,9 +3357,11 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) return 0; } -static void mvneta_validate(struct net_device *ndev, unsigned long *supported, +static void mvneta_validate(struct phylink_config *config, + unsigned long *supported, struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -3408,9 +3411,10 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, phylink_helper_basex_speed(state); } -static int mvneta_mac_link_state(struct net_device *ndev, +static int mvneta_mac_link_state(struct phylink_config *config, struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 gmac_stat; @@ -3438,8 +3442,9 @@ static int mvneta_mac_link_state(struct net_device *ndev, return 1; } -static void mvneta_mac_an_restart(struct net_device *ndev) +static void mvneta_mac_an_restart(struct phylink_config *config) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); @@ -3449,9 +3454,10 @@ static void mvneta_mac_an_restart(struct net_device *ndev) gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN); } -static void mvneta_mac_config(struct net_device *ndev, unsigned int mode, - const struct phylink_link_state *state) +static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0); u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2); @@ -3581,9 +3587,10 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable) mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1); } -static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode, - phy_interface_t interface) +static void mvneta_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 val; @@ -3600,10 +3607,11 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode, mvneta_set_eee(pp, false); } -static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode, +static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 val; @@ -4500,8 +4508,14 @@ static int mvneta_probe(struct platform_device *pdev) comphy = NULL; } - phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode, - &mvneta_phylink_ops); + pp = netdev_priv(dev); + spin_lock_init(&pp->lock); + + pp->phylink_config.dev = &dev->dev; + pp->phylink_config.type = PHYLINK_NETDEV; + + phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode, + phy_mode, &mvneta_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); goto err_free_irq; @@ -4513,8 +4527,6 @@ static int mvneta_probe(struct platform_device *pdev) dev->ethtool_ops = &mvneta_eth_tool_ops; - pp = netdev_priv(dev); - spin_lock_init(&pp->lock); pp->phylink = phylink; pp->comphy = comphy; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 18ae8d06b692..d67c970f02e5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -915,6 +915,7 @@ struct mvpp2_port { phy_interface_t phy_interface; struct phylink *phylink; + struct phylink_config phylink_config; struct phy *comphy; struct mvpp2_bm_pool *pool_long; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 3ed713b8dea5..757f8e31645e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -56,9 +56,9 @@ static struct { /* The prototype is added here to be used in start_dev when using ACPI. This * will be removed once phylink is used for all modes (dt+ACPI). */ -static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); -static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy); /* Queue modes */ @@ -3239,9 +3239,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port) struct phylink_link_state state = { .interface = port->phy_interface, }; - mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state); - mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface, - NULL); + mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state); + mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND, + port->phy_interface, NULL); } netif_tx_start_all_queues(port->dev); @@ -4463,11 +4463,12 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, eth_hw_addr_random(dev); } -static void mvpp2_phylink_validate(struct net_device *dev, +static void mvpp2_phylink_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; /* Invalid combinations */ @@ -4591,10 +4592,11 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port, state->pause |= MLO_PAUSE_TX; } -static int mvpp2_phylink_mac_link_state(struct net_device *dev, +static int mvpp2_phylink_mac_link_state(struct phylink_config *config, struct phylink_link_state *state) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); if (port->priv->hw_version == MVPP22 && port->gop_id == 0) { u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG); @@ -4610,9 +4612,10 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev, return 1; } -static void mvpp2_mac_an_restart(struct net_device *dev) +static void mvpp2_mac_an_restart(struct phylink_config *config) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN, @@ -4797,9 +4800,10 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, } } -static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); bool change_interface = port->phy_interface != state->interface; @@ -4839,9 +4843,10 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, mvpp2_port_enable(port); } -static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); u32 val; @@ -4866,9 +4871,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, netif_tx_wake_all_queues(dev); } -static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode, - phy_interface_t interface) +static void mvpp2_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); u32 val; @@ -5125,8 +5131,11 @@ static int mvpp2_port_probe(struct platform_device *pdev, /* Phylink isn't used w/ ACPI as of now */ if (port_node) { - phylink = phylink_create(dev, port_fwnode, phy_mode, - &mvpp2_phylink_ops); + port->phylink_config.dev = &dev->dev; + port->phylink_config.type = PHYLINK_NETDEV; + + phylink = phylink_create(&port->phylink_config, port_fwnode, + phy_mode, &mvpp2_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); goto err_free_port_pcpu; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 83ab83c3edba..5a283bf9d402 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -41,6 +41,7 @@ struct phylink { /* private: */ struct net_device *netdev; const struct phylink_mac_ops *ops; + struct phylink_config *config; unsigned long phylink_disable_state; /* bitmask of disables */ struct phy_device *phydev; @@ -111,7 +112,7 @@ static const char *phylink_an_mode_str(unsigned int mode) static int phylink_validate(struct phylink *pl, unsigned long *supported, struct phylink_link_state *state) { - pl->ops->validate(pl->netdev, supported, state); + pl->ops->validate(pl->config, supported, state); return phylink_is_empty_linkmode(supported) ? -EINVAL : 0; } @@ -299,7 +300,7 @@ static void phylink_mac_config(struct phylink *pl, __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising, state->pause, state->link, state->an_enabled); - pl->ops->mac_config(pl->netdev, pl->link_an_mode, state); + pl->ops->mac_config(pl->config, pl->link_an_mode, state); } static void phylink_mac_config_up(struct phylink *pl, @@ -313,12 +314,11 @@ static void phylink_mac_an_restart(struct phylink *pl) { if (pl->link_config.an_enabled && phy_interface_mode_is_8023z(pl->link_config.interface)) - pl->ops->mac_an_restart(pl->netdev); + pl->ops->mac_an_restart(pl->config); } static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *state) { - struct net_device *ndev = pl->netdev; linkmode_copy(state->advertising, pl->link_config.advertising); linkmode_zero(state->lp_advertising); @@ -330,7 +330,7 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state * state->an_complete = 0; state->link = 1; - return pl->ops->mac_link_state(ndev, state); + return pl->ops->mac_link_state(pl->config, state); } /* The fixed state is... fixed except for the link state, @@ -400,7 +400,7 @@ static void phylink_mac_link_up(struct phylink *pl, { struct net_device *ndev = pl->netdev; - pl->ops->mac_link_up(ndev, pl->link_an_mode, + pl->ops->mac_link_up(pl->config, pl->link_an_mode, pl->phy_state.interface, pl->phydev); @@ -418,7 +418,7 @@ static void phylink_mac_link_down(struct phylink *pl) struct net_device *ndev = pl->netdev; netif_carrier_off(ndev); - pl->ops->mac_link_down(ndev, pl->link_an_mode, + pl->ops->mac_link_down(pl->config, pl->link_an_mode, pl->phy_state.interface); netdev_info(ndev, "Link is Down\n"); } @@ -553,7 +553,7 @@ static int phylink_register_sfp(struct phylink *pl, * Returns a pointer to a &struct phylink, or an error-pointer value. Users * must use IS_ERR() to check for errors from this function. */ -struct phylink *phylink_create(struct net_device *ndev, +struct phylink *phylink_create(struct phylink_config *config, struct fwnode_handle *fwnode, phy_interface_t iface, const struct phylink_mac_ops *ops) @@ -567,7 +567,15 @@ struct phylink *phylink_create(struct net_device *ndev, mutex_init(&pl->state_mutex); INIT_WORK(&pl->resolve, phylink_resolve); - pl->netdev = ndev; + + pl->config = config; + if (config->type == PHYLINK_NETDEV) { + pl->netdev = to_net_dev(config->dev); + } else { + kfree(pl); + return ERR_PTR(-EINVAL); + } + pl->phy_state.interface = iface; pl->link_interface = iface; if (iface == PHY_INTERFACE_MODE_MOCA) diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 6411c624f63a..67f35f07ac4b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -54,6 +54,20 @@ struct phylink_link_state { unsigned int an_complete:1; }; +enum phylink_op_type { + PHYLINK_NETDEV = 0, +}; + +/** + * struct phylink_config - PHYLINK configuration structure + * @dev: a pointer to a struct device associated with the MAC + * @type: operation type of PHYLINK instance + */ +struct phylink_config { + struct device *dev; + enum phylink_op_type type; +}; + /** * struct phylink_mac_ops - MAC operations structure. * @validate: Validate and update the link configuration. @@ -66,16 +80,17 @@ struct phylink_link_state { * The individual methods are described more fully below. */ struct phylink_mac_ops { - void (*validate)(struct net_device *ndev, unsigned long *supported, + void (*validate)(struct phylink_config *config, + unsigned long *supported, struct phylink_link_state *state); - int (*mac_link_state)(struct net_device *ndev, + int (*mac_link_state)(struct phylink_config *config, struct phylink_link_state *state); - void (*mac_config)(struct net_device *ndev, unsigned int mode, + void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); - void (*mac_an_restart)(struct net_device *ndev); - void (*mac_link_down)(struct net_device *ndev, unsigned int mode, + void (*mac_an_restart)(struct phylink_config *config); + void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); - void (*mac_link_up)(struct net_device *ndev, unsigned int mode, + void (*mac_link_up)(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy); }; @@ -83,7 +98,7 @@ struct phylink_mac_ops { #if 0 /* For kernel-doc purposes only. */ /** * validate - Validate and update the link configuration - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. * @supported: ethtool bitmask for supported link modes. * @state: a pointer to a &struct phylink_link_state. * @@ -100,12 +115,12 @@ struct phylink_mac_ops { * based on @state->advertising and/or @state->speed and update * @state->interface accordingly. */ -void validate(struct net_device *ndev, unsigned long *supported, +void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); /** * mac_link_state() - Read the current link state from the hardware - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. * @state: a pointer to a &struct phylink_link_state. * * Read the current link state from the MAC, reporting the current @@ -114,12 +129,12 @@ void validate(struct net_device *ndev, unsigned long *supported, * negotiation completion state in @state->an_complete, and link * up state in @state->link. */ -int mac_link_state(struct net_device *ndev, +int mac_link_state(struct phylink_config *config, struct phylink_link_state *state); /** * mac_config() - configure the MAC for the selected mode and state - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. * @state: a pointer to a &struct phylink_link_state. * @@ -157,18 +172,18 @@ int mac_link_state(struct net_device *ndev, * down. This "update" behaviour is critical to avoid bouncing the * link up status. */ -void mac_config(struct net_device *ndev, unsigned int mode, +void mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); /** * mac_an_restart() - restart 802.3z BaseX autonegotiation - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. */ -void mac_an_restart(struct net_device *ndev); +void mac_an_restart(struct phylink_config *config); /** * mac_link_down() - take the link down - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @@ -177,12 +192,12 @@ void mac_an_restart(struct net_device *ndev); * Energy Efficient Ethernet MAC configuration. Interface type * selection must be done in mac_config(). */ -void mac_link_down(struct net_device *ndev, unsigned int mode, +void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** * mac_link_up() - allow the link to come up - * @ndev: a pointer to a &struct net_device for the MAC. + * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @phy: any attached phy @@ -193,13 +208,14 @@ void mac_link_down(struct net_device *ndev, unsigned int mode, * phy_init_eee() and perform appropriate MAC configuration for EEE. * Interface type selection must be done in mac_config(). */ -void mac_link_up(struct net_device *ndev, unsigned int mode, +void mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy); #endif -struct phylink *phylink_create(struct net_device *, struct fwnode_handle *, - phy_interface_t iface, const struct phylink_mac_ops *ops); +struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, + phy_interface_t iface, + const struct phylink_mac_ops *ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); diff --git a/include/net/dsa.h b/include/net/dsa.h index 685294817712..a7f36219904f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -193,6 +194,7 @@ struct dsa_port { struct net_device *bridge_dev; struct devlink_port devlink_port; struct phylink *pl; + struct phylink_config pl_config; struct work_struct xmit_work; struct sk_buff_head xmit_queue; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9892ca1f6859..48e017637d4f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1164,11 +1164,11 @@ static struct device_type dsa_type = { .name = "dsa", }; -static void dsa_slave_phylink_validate(struct net_device *dev, +static void dsa_slave_phylink_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; if (!ds->ops->phylink_validate) @@ -1177,10 +1177,10 @@ static void dsa_slave_phylink_validate(struct net_device *dev, ds->ops->phylink_validate(ds, dp->index, supported, state); } -static int dsa_slave_phylink_mac_link_state(struct net_device *dev, +static int dsa_slave_phylink_mac_link_state(struct phylink_config *config, struct phylink_link_state *state) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; /* Only called for SGMII and 802.3z */ @@ -1190,11 +1190,11 @@ static int dsa_slave_phylink_mac_link_state(struct net_device *dev, return ds->ops->phylink_mac_link_state(ds, dp->index, state); } -static void dsa_slave_phylink_mac_config(struct net_device *dev, +static void dsa_slave_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; if (!ds->ops->phylink_mac_config) @@ -1203,9 +1203,9 @@ static void dsa_slave_phylink_mac_config(struct net_device *dev, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } -static void dsa_slave_phylink_mac_an_restart(struct net_device *dev) +static void dsa_slave_phylink_mac_an_restart(struct phylink_config *config) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; if (!ds->ops->phylink_mac_an_restart) @@ -1214,11 +1214,12 @@ static void dsa_slave_phylink_mac_an_restart(struct net_device *dev) ds->ops->phylink_mac_an_restart(ds, dp->index); } -static void dsa_slave_phylink_mac_link_down(struct net_device *dev, +static void dsa_slave_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct net_device *dev = dp->slave; struct dsa_switch *ds = dp->ds; if (!ds->ops->phylink_mac_link_down) { @@ -1230,12 +1231,13 @@ static void dsa_slave_phylink_mac_link_down(struct net_device *dev, ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface); } -static void dsa_slave_phylink_mac_link_up(struct net_device *dev, +static void dsa_slave_phylink_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phydev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct net_device *dev = dp->slave; struct dsa_switch *ds = dp->ds; if (!ds->ops->phylink_mac_link_up) { @@ -1303,7 +1305,10 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) if (mode < 0) mode = PHY_INTERFACE_MODE_NA; - dp->pl = phylink_create(slave_dev, of_fwnode_handle(port_dn), mode, + dp->pl_config.dev = &slave_dev->dev; + dp->pl_config.type = PHYLINK_NETDEV; + + dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode, &dsa_slave_phylink_mac_ops); if (IS_ERR(dp->pl)) { netdev_err(slave_dev, -- cgit v1.2.3 From 43de61959b999279bafb031c0c9bdf0f6cd1c501 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 28 May 2019 20:38:13 +0300 Subject: net: phylink: Add PHYLINK_DEV operation type In the PHYLINK_DEV operation type, the PHYLINK infrastructure can work without an attached net_device. For printing usecases, instead, a struct device * should be passed to PHYLINK using the phylink_config structure. Also, netif_carrier_* calls ar guarded by the presence of a valid net_device. When using the PHYLINK_DEV operation type, we cannot check link status using the netif_carrier_ok() API so instead, keep an internal state of the MAC and call mac_link_{down,up} only when the link changed. Signed-off-by: Ioana Ciornei Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 25 ++++++++++++++++++++----- include/linux/phylink.h | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5a283bf9d402..5f6120f3fa3f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -42,6 +42,8 @@ struct phylink { struct net_device *netdev; const struct phylink_mac_ops *ops; struct phylink_config *config; + struct device *dev; + unsigned int old_link_state:1; unsigned long phylink_disable_state; /* bitmask of disables */ struct phy_device *phydev; @@ -404,7 +406,8 @@ static void phylink_mac_link_up(struct phylink *pl, pl->phy_state.interface, pl->phydev); - netif_carrier_on(ndev); + if (ndev) + netif_carrier_on(ndev); netdev_info(ndev, "Link is Up - %s/%s - flow control %s\n", @@ -417,7 +420,8 @@ static void phylink_mac_link_down(struct phylink *pl) { struct net_device *ndev = pl->netdev; - netif_carrier_off(ndev); + if (ndev) + netif_carrier_off(ndev); pl->ops->mac_link_down(pl->config, pl->link_an_mode, pl->phy_state.interface); netdev_info(ndev, "Link is Down\n"); @@ -428,6 +432,7 @@ static void phylink_resolve(struct work_struct *w) struct phylink *pl = container_of(w, struct phylink, resolve); struct phylink_link_state link_state; struct net_device *ndev = pl->netdev; + int link_changed; mutex_lock(&pl->state_mutex); if (pl->phylink_disable_state) { @@ -470,7 +475,13 @@ static void phylink_resolve(struct work_struct *w) } } - if (link_state.link != netif_carrier_ok(ndev)) { + if (pl->netdev) + link_changed = (link_state.link != netif_carrier_ok(ndev)); + else + link_changed = (link_state.link != pl->old_link_state); + + if (link_changed) { + pl->old_link_state = link_state.link; if (!link_state.link) phylink_mac_link_down(pl); else @@ -571,6 +582,8 @@ struct phylink *phylink_create(struct phylink_config *config, pl->config = config; if (config->type == PHYLINK_NETDEV) { pl->netdev = to_net_dev(config->dev); + } else if (config->type == PHYLINK_DEV) { + pl->dev = config->dev; } else { kfree(pl); return ERR_PTR(-EINVAL); @@ -910,7 +923,8 @@ void phylink_start(struct phylink *pl) phy_modes(pl->link_config.interface)); /* Always set the carrier off */ - netif_carrier_off(pl->netdev); + if (pl->netdev) + netif_carrier_off(pl->netdev); /* Apply the link configuration to the MAC when starting. This allows * a fixed-link to start with the correct parameters, and also @@ -1255,7 +1269,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, switch (pl->link_an_mode) { case MLO_AN_PHY: /* Silently mark the carrier down, and then trigger a resolve */ - netif_carrier_off(pl->netdev); + if (pl->netdev) + netif_carrier_off(pl->netdev); phylink_run_resolve(pl); break; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 67f35f07ac4b..0f6f65bb9d44 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -56,6 +56,7 @@ struct phylink_link_state { enum phylink_op_type { PHYLINK_NETDEV = 0, + PHYLINK_DEV, }; /** -- cgit v1.2.3 From 84ede58dfcd1db6f04f71dd3ccd5328271b346da Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2019 09:54:46 -0700 Subject: crypto: hash - remove CRYPTO_ALG_TYPE_DIGEST Remove the unnecessary constant CRYPTO_ALG_TYPE_DIGEST, which has the same value as CRYPTO_ALG_TYPE_HASH. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- Documentation/crypto/architecture.rst | 4 +--- crypto/cryptd.c | 2 +- include/linux/crypto.h | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst index ee8ff0762d7f..3eae1ae7f798 100644 --- a/Documentation/crypto/architecture.rst +++ b/Documentation/crypto/architecture.rst @@ -208,9 +208,7 @@ the aforementioned cipher types: - CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as an ECDH or DH implementation -- CRYPTO_ALG_TYPE_DIGEST Raw message digest - -- CRYPTO_ALG_TYPE_HASH Alias for CRYPTO_ALG_TYPE_DIGEST +- CRYPTO_ALG_TYPE_HASH Raw message digest - CRYPTO_ALG_TYPE_SHASH Synchronous multi-block hash diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 1bf777b76512..c34d10309b1b 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -925,7 +925,7 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_BLKCIPHER: return cryptd_create_skcipher(tmpl, tb, &queue); - case CRYPTO_ALG_TYPE_DIGEST: + case CRYPTO_ALG_TYPE_HASH: return cryptd_create_hash(tmpl, tb, &queue); case CRYPTO_ALG_TYPE_AEAD: return cryptd_create_aead(tmpl, tb, &queue); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index f2565a103158..311237b1dab0 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -54,7 +54,6 @@ #define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d -#define CRYPTO_ALG_TYPE_DIGEST 0x0000000e #define CRYPTO_ALG_TYPE_HASH 0x0000000e #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -- cgit v1.2.3 From 1e91a2e5d827e643cbabad66d133f155a7fcb0de Mon Sep 17 00:00:00 2001 From: Ruslan Babayev Date: Tue, 28 May 2019 16:02:32 -0700 Subject: i2c: acpi: export i2c_acpi_find_adapter_by_handle This allows drivers to lookup i2c adapters on ACPI based systems similar to of_get_i2c_adapter_by_node() with DT based systems. Signed-off-by: Ruslan Babayev Cc: xe-linux-external@cisco.com Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/i2c/i2c-core-acpi.c | 3 ++- include/linux/i2c.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 272800692088..964687534754 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -337,7 +337,7 @@ static int i2c_acpi_find_match_device(struct device *dev, void *data) return ACPI_COMPANION(dev) == data; } -static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) +struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) { struct device *dev; @@ -345,6 +345,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) i2c_acpi_find_match_adapter); return dev ? i2c_verify_adapter(dev) : NULL; } +EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle); static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1308126fc384..e982b8913b73 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -14,6 +14,7 @@ #ifndef _LINUX_I2C_H #define _LINUX_I2C_H +#include /* for acpi_handle */ #include #include /* for struct device */ #include /* for completion */ @@ -981,6 +982,7 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info); +struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); #else static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c) @@ -996,6 +998,10 @@ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, { return NULL; } +static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) +{ + return NULL; +} #endif /* CONFIG_ACPI */ #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From 764dd163ac922f8683b5bcd3007251ce7b26cd33 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 May 2019 13:25:38 +0200 Subject: netfilter: nf_conntrack_bridge: add support for IPv6 br_defrag() and br_fragment() indirections are added in case that IPv6 support comes as a module, to avoid pulling innecessary dependencies in. The new fraglist iterator and fragment transformer APIs are used to implement the refragmentation code. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 50 ++++++++++++ net/bridge/netfilter/nf_conntrack_bridge.c | 59 +++++++++++++- net/ipv6/netfilter.c | 123 +++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 12113e502656..a21b8c9623ee 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -19,6 +19,7 @@ struct ip6_rt_info { }; struct nf_queue_entry; +struct nf_ct_bridge_frag_data; /* * Hook functions for ipv6 to allow xt_* modules to be built-in even @@ -39,6 +40,15 @@ struct nf_ipv6_ops { int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); +#if IS_MODULE(CONFIG_IPV6) + int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user); + int (*br_fragment)(struct net *net, struct sock *sk, + struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)); +#endif }; #ifdef CONFIG_NETFILTER @@ -86,6 +96,46 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, #endif } +static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb, + u32 user) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->br_defrag(net, skb, user); +#else + return nf_ct_frag6_gather(net, skb, user); +#endif +} + +int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)); + +static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, + struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->br_fragment(net, sk, skb, data, output); +#else + return br_ip6_fragment(net, sk, skb, data, output); +#endif +} + int ip6_route_me_harder(struct net *net, struct sk_buff *skb); static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 2571528ed582..b675cd7c1a82 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -163,6 +163,31 @@ static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, return NF_STOLEN; } +static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, + const struct nf_hook_state *state) +{ + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; + enum ip_conntrack_info ctinfo; + struct br_input_skb_cb cb; + const struct nf_conn *ct; + int err; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + + br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); + + err = nf_ipv6_br_defrag(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + /* queued */ + if (err == -EINPROGRESS) + return NF_STOLEN; + + br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); + return err == 0 ? NF_ACCEPT : NF_DROP; +} + static int nf_ct_br_ip_check(const struct sk_buff *skb) { const struct iphdr *iph; @@ -177,6 +202,23 @@ static int nf_ct_br_ip_check(const struct sk_buff *skb) len = ntohs(iph->tot_len); if (skb->len < nhoff + len || len < (iph->ihl * 4)) + return -1; + + return 0; +} + +static int nf_ct_br_ipv6_check(const struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + int nhoff, len; + + nhoff = skb_network_offset(skb); + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return -1; + + len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; + if (skb->len < len) return -1; return 0; @@ -212,7 +254,19 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, ret = nf_ct_br_defrag4(skb, &bridge_state); break; case htons(ETH_P_IPV6): - /* fall through */ + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return NF_ACCEPT; + + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + if (pskb_trim_rcsum(skb, len)) + return NF_ACCEPT; + + if (nf_ct_br_ipv6_check(skb)) + return NF_ACCEPT; + + bridge_state.pf = NFPROTO_IPV6; + ret = nf_ct_br_defrag6(skb, &bridge_state); + break; default: nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; @@ -254,7 +308,8 @@ nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, nf_br_ip_fragment(state->net, state->sk, skb, &data, output); break; case htons(ETH_P_IPV6): - return NF_ACCEPT; + nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); + break; default: WARN_ON_ONCE(1); return NF_DROP; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1240ccd57f39..c6665382acb5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include "../bridge/br_private.h" int ip6_route_me_harder(struct net *net, struct sk_buff *skb) { @@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, } EXPORT_SYMBOL_GPL(__nf_ip6_route); +int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)) +{ + int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + struct ip6_frag_state state; + u8 *prevhdr, nexthdr = 0; + unsigned int mtu, hlen; + int hroom, err = 0; + __be32 frag_id; + + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + goto blackhole; + hlen = err; + nexthdr = *prevhdr; + + mtu = skb->dev->mtu; + if (frag_max_size > mtu || + frag_max_size < IPV6_MIN_MTU) + goto blackhole; + + mtu = frag_max_size; + if (mtu < hlen + sizeof(struct frag_hdr) + 8) + goto blackhole; + mtu -= hlen + sizeof(struct frag_hdr); + + frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto blackhole; + + hroom = LL_RESERVED_SPACE(skb->dev); + if (skb_has_frag_list(skb)) { + unsigned int first_len = skb_pagelen(skb); + struct ip6_fraglist_iter iter; + struct sk_buff *frag2; + + if (first_len - hlen > mtu || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) + goto blackhole; + + if (skb_cloned(skb)) + goto slow_path; + + skb_walk_frags(skb, frag2) { + if (frag2->len > mtu || + skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) + goto blackhole; + + /* Partially cloned skb? */ + if (skb_shared(frag2)) + goto slow_path; + } + + err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, + &iter); + if (err < 0) + goto blackhole; + + for (;;) { + /* Prepare header of the next frame, + * before previous one went down. + */ + if (iter.frag) + ip6_fraglist_prepare(skb, &iter); + + err = output(net, sk, data, skb); + if (err || !iter.frag) + break; + + skb = ip6_fraglist_next(&iter); + } + + kfree(iter.tmp_hdr); + if (!err) + return 0; + + kfree_skb_list(iter.frag_list); + return err; + } +slow_path: + /* This is a linearized skbuff, the original geometry is lost for us. + * This may also be a clone skbuff, we could preserve the geometry for + * the copies but probably not worth the effort. + */ + ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom, + LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id, + &state); + + while (state.left > 0) { + struct sk_buff *skb2; + + skb2 = ip6_frag_next(skb, &state); + if (IS_ERR(skb2)) { + err = PTR_ERR(skb2); + goto blackhole; + } + + err = output(net, sk, data, skb2); + if (err) + goto blackhole; + } + consume_skb(skb); + return err; + +blackhole: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(br_ip6_fragment); + static const struct nf_ipv6_ops ipv6ops = { #if IS_MODULE(CONFIG_IPV6) .chk_addr = ipv6_chk_addr, @@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = { .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, +#if IS_MODULE(CONFIG_NF_CONNTRACK_BRIDGE) + .br_defrag = nf_ct_frag6_gather, + .br_fragment = br_ip6_fragment, +#endif }; int __init ipv6_netfilter_init(void) -- cgit v1.2.3 From ed0ac5c7ec3763e3261c48e3c5d4b7528b60fd85 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 May 2019 21:51:50 +0100 Subject: keys: Add a keyctl to move a key between keyrings Add a keyctl to atomically move a link to a key from one keyring to another. The key must exist in "from" keyring and a flag can be given to cause the operation to fail if there's a matching key already in the "to" keyring. This can be done with: keyctl(KEYCTL_MOVE, key_serial_t key, key_serial_t from_keyring, key_serial_t to_keyring, unsigned int flags); The key being moved must grant Link permission and both keyrings must grant Write permission. flags should be 0 or KEYCTL_MOVE_EXCL, with the latter preventing displacement of a matching key from the "to" keyring. Signed-off-by: David Howells --- Documentation/security/keys/core.rst | 21 +++++++ include/linux/key.h | 5 ++ include/uapi/linux/keyctl.h | 3 + security/keys/compat.c | 3 + security/keys/internal.h | 3 + security/keys/keyctl.c | 52 +++++++++++++++++ security/keys/keyring.c | 108 +++++++++++++++++++++++++++++++++++ 7 files changed, 195 insertions(+) (limited to 'include/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index 9521c4207f01..823d29bf44f7 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -577,6 +577,27 @@ The keyctl syscall functions are: added. + * Move a key from one keyring to another:: + + long keyctl(KEYCTL_MOVE, + key_serial_t id, + key_serial_t from_ring_id, + key_serial_t to_ring_id, + unsigned int flags); + + Move the key specified by "id" from the keyring specified by + "from_ring_id" to the keyring specified by "to_ring_id". If the two + keyrings are the same, nothing is done. + + "flags" can have KEYCTL_MOVE_EXCL set in it to cause the operation to fail + with EEXIST if a matching key exists in the destination keyring, otherwise + such a key will be replaced. + + A process must have link permission on the key for this function to be + successful and write permission on both keyrings. Any errors that can + occur from KEYCTL_LINK also apply on the destination keyring here. + + * Unlink a key or keyring from another keyring:: long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key); diff --git a/include/linux/key.h b/include/linux/key.h index 1f09aad1c98c..612e1cf84049 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -310,6 +310,11 @@ extern int key_update(key_ref_t key, extern int key_link(struct key *keyring, struct key *key); +extern int key_move(struct key *key, + struct key *from_keyring, + struct key *to_keyring, + unsigned int flags); + extern int key_unlink(struct key *keyring, struct key *key); diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index f45ee0f69c0c..fd9fb11b312b 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -67,6 +67,7 @@ #define KEYCTL_PKEY_SIGN 27 /* Create a public key signature */ #define KEYCTL_PKEY_VERIFY 28 /* Verify a public key signature */ #define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ +#define KEYCTL_MOVE 30 /* Move keys between keyrings */ /* keyctl structures */ struct keyctl_dh_params { @@ -112,4 +113,6 @@ struct keyctl_pkey_params { __u32 __spare[7]; }; +#define KEYCTL_MOVE_EXCL 0x00000001 /* Do not displace from the to-keyring */ + #endif /* _LINUX_KEYCTL_H */ diff --git a/security/keys/compat.c b/security/keys/compat.c index 9482df601dc3..b326bc4f84d7 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -159,6 +159,9 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3), compat_ptr(arg4), compat_ptr(arg5)); + case KEYCTL_MOVE: + return keyctl_keyring_move(arg2, arg3, arg4, arg5); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 25cdd0cbdc06..b54a58c025ae 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -95,6 +95,8 @@ extern void key_type_put(struct key_type *ktype); extern int __key_link_lock(struct key *keyring, const struct keyring_index_key *index_key); +extern int __key_move_lock(struct key *l_keyring, struct key *u_keyring, + const struct keyring_index_key *index_key); extern int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit); @@ -217,6 +219,7 @@ extern long keyctl_update_key(key_serial_t, const void __user *, size_t); extern long keyctl_revoke_key(key_serial_t); extern long keyctl_keyring_clear(key_serial_t); extern long keyctl_keyring_link(key_serial_t, key_serial_t); +extern long keyctl_keyring_move(key_serial_t, key_serial_t, key_serial_t, unsigned int); extern long keyctl_keyring_unlink(key_serial_t, key_serial_t); extern long keyctl_describe_key(key_serial_t, char __user *, size_t); extern long keyctl_keyring_search(key_serial_t, const char __user *, diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0f947bcbad46..bbfe7d92d41c 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -572,6 +572,52 @@ error: return ret; } +/* + * Move a link to a key from one keyring to another, displacing any matching + * key from the destination keyring. + * + * The key must grant the caller Link permission and both keyrings must grant + * the caller Write permission. There must also be a link in the from keyring + * to the key. If both keyrings are the same, nothing is done. + * + * If successful, 0 will be returned. + */ +long keyctl_keyring_move(key_serial_t id, key_serial_t from_ringid, + key_serial_t to_ringid, unsigned int flags) +{ + key_ref_t key_ref, from_ref, to_ref; + long ret; + + if (flags & ~KEYCTL_MOVE_EXCL) + return -EINVAL; + + key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + + from_ref = lookup_user_key(from_ringid, 0, KEY_NEED_WRITE); + if (IS_ERR(from_ref)) { + ret = PTR_ERR(from_ref); + goto error2; + } + + to_ref = lookup_user_key(to_ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); + if (IS_ERR(to_ref)) { + ret = PTR_ERR(to_ref); + goto error3; + } + + ret = key_move(key_ref_to_ptr(key_ref), key_ref_to_ptr(from_ref), + key_ref_to_ptr(to_ref), flags); + + key_ref_put(to_ref); +error3: + key_ref_put(from_ref); +error2: + key_ref_put(key_ref); + return ret; +} + /* * Return a description of a key to userspace. * @@ -1772,6 +1818,12 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, (const void __user *)arg4, (const void __user *)arg5); + case KEYCTL_MOVE: + return keyctl_keyring_move((key_serial_t)arg2, + (key_serial_t)arg3, + (key_serial_t)arg4, + (unsigned int)arg5); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 12acad3db6cf..67066bb58b83 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1221,6 +1221,40 @@ int __key_link_lock(struct key *keyring, return 0; } +/* + * Lock keyrings for move (link/unlink combination). + */ +int __key_move_lock(struct key *l_keyring, struct key *u_keyring, + const struct keyring_index_key *index_key) + __acquires(&l_keyring->sem) + __acquires(&u_keyring->sem) + __acquires(&keyring_serialise_link_lock) +{ + if (l_keyring->type != &key_type_keyring || + u_keyring->type != &key_type_keyring) + return -ENOTDIR; + + /* We have to be very careful here to take the keyring locks in the + * right order, lest we open ourselves to deadlocking against another + * move operation. + */ + if (l_keyring < u_keyring) { + down_write(&l_keyring->sem); + down_write_nested(&u_keyring->sem, 1); + } else { + down_write(&u_keyring->sem); + down_write_nested(&l_keyring->sem, 1); + } + + /* Serialise link/link calls to prevent parallel calls causing a cycle + * when linking two keyring in opposite orders. + */ + if (index_key->type == &key_type_keyring) + mutex_lock(&keyring_serialise_link_lock); + + return 0; +} + /* * Preallocate memory so that a key can be linked into to a keyring. */ @@ -1494,6 +1528,80 @@ int key_unlink(struct key *keyring, struct key *key) } EXPORT_SYMBOL(key_unlink); +/** + * key_move - Move a key from one keyring to another + * @key: The key to move + * @from_keyring: The keyring to remove the link from. + * @to_keyring: The keyring to make the link in. + * @flags: Qualifying flags, such as KEYCTL_MOVE_EXCL. + * + * Make a link in @to_keyring to a key, such that the keyring holds a reference + * on that key and the key can potentially be found by searching that keyring + * whilst simultaneously removing a link to the key from @from_keyring. + * + * This function will write-lock both keyring's semaphores and will consume + * some of the user's key data quota to hold the link on @to_keyring. + * + * Returns 0 if successful, -ENOTDIR if either keyring isn't a keyring, + * -EKEYREVOKED if either keyring has been revoked, -ENFILE if the second + * keyring is full, -EDQUOT if there is insufficient key data quota remaining + * to add another link or -ENOMEM if there's insufficient memory. If + * KEYCTL_MOVE_EXCL is set, then -EEXIST will be returned if there's already a + * matching key in @to_keyring. + * + * It is assumed that the caller has checked that it is permitted for a link to + * be made (the keyring should have Write permission and the key Link + * permission). + */ +int key_move(struct key *key, + struct key *from_keyring, + struct key *to_keyring, + unsigned int flags) +{ + struct assoc_array_edit *from_edit = NULL, *to_edit = NULL; + int ret; + + kenter("%d,%d,%d", key->serial, from_keyring->serial, to_keyring->serial); + + if (from_keyring == to_keyring) + return 0; + + key_check(key); + key_check(from_keyring); + key_check(to_keyring); + + ret = __key_move_lock(from_keyring, to_keyring, &key->index_key); + if (ret < 0) + goto out; + ret = __key_unlink_begin(from_keyring, key, &from_edit); + if (ret < 0) + goto error; + ret = __key_link_begin(to_keyring, &key->index_key, &to_edit); + if (ret < 0) + goto error; + + ret = -EEXIST; + if (to_edit->dead_leaf && (flags & KEYCTL_MOVE_EXCL)) + goto error; + + ret = __key_link_check_restriction(to_keyring, key); + if (ret < 0) + goto error; + ret = __key_link_check_live_key(to_keyring, key); + if (ret < 0) + goto error; + + __key_unlink(from_keyring, key, &from_edit); + __key_link(key, &to_edit); +error: + __key_link_end(to_keyring, &key->index_key, to_edit); + __key_unlink_end(from_keyring, key, from_edit); +out: + kleave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(key_move); + /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. -- cgit v1.2.3 From 07b0928918c694c845a387cc16256a8b63ced4fc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 May 2019 15:09:15 +0200 Subject: net: phy: enable interrupts when PHY is attached already This patch is a step towards allowing PHY drivers to handle more interrupt sources than just link change. E.g. several PHY's have built-in temperature monitoring and can raise an interrupt if a temperature threshold is exceeded. We may be interested in such interrupts also if the phylib state machine isn't started. Therefore move enabling interrupts to phy_request_interrupt(). v2: - patch added to series Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 36 ++++++++++++++++++++++-------------- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 1 + 3 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e8885429293a..4ba71dc3aee7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -799,10 +799,10 @@ static int phy_enable_interrupts(struct phy_device *phydev) } /** - * phy_request_interrupt - request interrupt for a PHY device + * phy_request_interrupt - request and enable interrupt for a PHY device * @phydev: target phy_device struct * - * Description: Request the interrupt for the given PHY. + * Description: Request and enable the interrupt for the given PHY. * If this fails, then we set irq to PHY_POLL. * This should only be called with a valid IRQ number. */ @@ -817,10 +817,30 @@ void phy_request_interrupt(struct phy_device *phydev) phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n", err, phydev->irq); phydev->irq = PHY_POLL; + } else { + if (phy_enable_interrupts(phydev)) { + phydev_warn(phydev, "Can't enable interrupt, falling back to polling\n"); + phy_free_interrupt(phydev); + phydev->irq = PHY_POLL; + } } } EXPORT_SYMBOL(phy_request_interrupt); +/** + * phy_free_interrupt - disable and free interrupt for a PHY device + * @phydev: target phy_device struct + * + * Description: Disable and free the interrupt for the given PHY. + * This should only be called with a valid IRQ number. + */ +void phy_free_interrupt(struct phy_device *phydev) +{ + phy_disable_interrupts(phydev); + free_irq(phydev->irq, phydev); +} +EXPORT_SYMBOL(phy_free_interrupt); + /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct @@ -835,9 +855,6 @@ void phy_stop(struct phy_device *phydev) mutex_lock(&phydev->lock); - if (phy_interrupt_is_valid(phydev)) - phy_disable_interrupts(phydev); - phydev->state = PHY_HALTED; mutex_unlock(&phydev->lock); @@ -864,8 +881,6 @@ EXPORT_SYMBOL(phy_stop); */ void phy_start(struct phy_device *phydev) { - int err; - mutex_lock(&phydev->lock); if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) { @@ -877,13 +892,6 @@ void phy_start(struct phy_device *phydev) /* if phy was suspended, bring the physical link up again */ __phy_resume(phydev); - /* make sure interrupts are enabled for the PHY */ - if (phy_interrupt_is_valid(phydev)) { - err = phy_enable_interrupts(phydev); - if (err < 0) - goto out; - } - phydev->state = PHY_UP; phy_start_machine(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8b4fc3b4f269..2c879ba01f35 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1016,7 +1016,7 @@ void phy_disconnect(struct phy_device *phydev) phy_stop(phydev); if (phy_interrupt_is_valid(phydev)) - free_irq(phydev->irq, phydev); + phy_free_interrupt(phydev); phydev->adjust_link = NULL; diff --git a/include/linux/phy.h b/include/linux/phy.h index 7180b1d1e5e3..72e1196f9799 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1147,6 +1147,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); void phy_request_interrupt(struct phy_device *phydev); +void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); -- cgit v1.2.3 From 49644e68f472c6480e015253fa4d7448c6cfa2aa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 May 2019 15:10:06 +0200 Subject: net: phy: add callback for custom interrupt handler to struct phy_driver The phylib interrupt handler handles link change events only currently. However PHY drivers may want to use other interrupt sources too, e.g. to report temperature monitoring events. Therefore add a callback to struct phy_driver allowing PHY drivers to implement a custom interrupt handler. Signed-off-by: Heiner Kallweit Suggested-by: Russell King - ARM Linux admin Acked-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 9 +++++++-- include/linux/phy.h | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 4ba71dc3aee7..c6b0010a6d20 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -772,8 +772,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) return IRQ_NONE; - /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev); + if (phydev->drv->handle_interrupt) { + if (phydev->drv->handle_interrupt(phydev)) + goto phy_err; + } else { + /* reschedule state queue work to run as soon as possible */ + phy_trigger_machine(phydev); + } if (phy_clear_interrupt(phydev)) goto phy_err; diff --git a/include/linux/phy.h b/include/linux/phy.h index 72e1196f9799..16cd33915496 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -537,6 +537,9 @@ struct phy_driver { */ int (*did_interrupt)(struct phy_device *phydev); + /* Override default interrupt handling */ + int (*handle_interrupt)(struct phy_device *phydev); + /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); -- cgit v1.2.3 From 97b33bdf9bddb6bebc2e87148df3e30aa7a13b2d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 May 2019 15:11:06 +0200 Subject: net: phy: export phy_queue_state_machine We face the issue that link change interrupt and link status may be reported by different PHY layers. As a result the link change interrupt may occur before the link status changes. Export phy_queue_state_machine to allow PHY drivers to specify a delay between link status change interrupt and link status check. v2: - change jiffies parameter type to unsigned long Signed-off-by: Heiner Kallweit Suggested-by: Russell King Acked-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 8 +++++--- include/linux/phy.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c6b0010a6d20..84671d868a80 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -29,6 +29,8 @@ #include #include +#define PHY_STATE_TIME HZ + #define PHY_STATE_STR(_state) \ case PHY_##_state: \ return __stringify(_state); \ @@ -478,12 +480,12 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) } EXPORT_SYMBOL(phy_mii_ioctl); -static void phy_queue_state_machine(struct phy_device *phydev, - unsigned int secs) +void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies) { mod_delayed_work(system_power_efficient_wq, &phydev->state_queue, - secs * HZ); + jiffies); } +EXPORT_SYMBOL(phy_queue_state_machine); static void phy_trigger_machine(struct phy_device *phydev) { diff --git a/include/linux/phy.h b/include/linux/phy.h index 16cd33915496..dc4b51060ebc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -188,7 +188,6 @@ static inline const char *phy_modes(phy_interface_t interface) #define PHY_INIT_TIMEOUT 100000 -#define PHY_STATE_TIME 1 #define PHY_FORCE_TIMEOUT 10 #define PHY_MAX_ADDR 32 @@ -1140,6 +1139,7 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); +void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From 229b4e0728e0a6ddca2645e73696d5b104fbbbfb Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 14 May 2019 22:47:24 +0800 Subject: Documentation: PCI: convert pci.txt to reST Convert plain text documentation to reStructuredText format and add it to Sphinx TOC tree. No essential content change. Move the description of struct pci_driver and struct pci_device_id into in-source comments. Signed-off-by: Changbin Du [bhelgaas: fix kernel-doc warnings related to moving descriptions to linux/pci.h, fix "space tab" whitespace errors in mod_devicetable.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Mauro Carvalho Chehab --- Documentation/PCI/index.rst | 2 + Documentation/PCI/pci.rst | 578 ++++++++++++++++++++++++++++++++++++ Documentation/PCI/pci.txt | 636 ---------------------------------------- include/linux/mod_devicetable.h | 29 +- include/linux/pci.h | 48 ++- 5 files changed, 651 insertions(+), 642 deletions(-) create mode 100644 Documentation/PCI/pci.rst delete mode 100644 Documentation/PCI/pci.txt (limited to 'include/linux') diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst index c2f8728d11cf..7babf43709b0 100644 --- a/Documentation/PCI/index.rst +++ b/Documentation/PCI/index.rst @@ -7,3 +7,5 @@ Linux PCI Bus Subsystem .. toctree:: :maxdepth: 2 :numbered: + + pci diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst new file mode 100644 index 000000000000..6864f9a70f5f --- /dev/null +++ b/Documentation/PCI/pci.rst @@ -0,0 +1,578 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +How To Write Linux PCI Drivers +============================== + +:Authors: - Martin Mares + - Grant Grundler + +The world of PCI is vast and full of (mostly unpleasant) surprises. +Since each CPU architecture implements different chip-sets and PCI devices +have different requirements (erm, "features"), the result is the PCI support +in the Linux kernel is not as trivial as one would wish. This short paper +tries to introduce all potential driver authors to Linux APIs for +PCI device drivers. + +A more complete resource is the third edition of "Linux Device Drivers" +by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman. +LDD3 is available for free (under Creative Commons License) from: +http://lwn.net/Kernel/LDD3/. + +However, keep in mind that all documents are subject to "bit rot". +Refer to the source code if things are not working as described here. + +Please send questions/comments/patches about Linux PCI API to the +"Linux PCI" mailing list. + + +Structure of PCI drivers +======================== +PCI drivers "discover" PCI devices in a system via pci_register_driver(). +Actually, it's the other way around. When the PCI generic code discovers +a new device, the driver with a matching "description" will be notified. +Details on this below. + +pci_register_driver() leaves most of the probing for devices to +the PCI layer and supports online insertion/removal of devices [thus +supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver]. +pci_register_driver() call requires passing in a table of function +pointers and thus dictates the high level structure of a driver. + +Once the driver knows about a PCI device and takes ownership, the +driver generally needs to perform the following initialization: + + - Enable the device + - Request MMIO/IOP resources + - Set the DMA mask size (for both coherent and streaming DMA) + - Allocate and initialize shared control data (pci_allocate_coherent()) + - Access device configuration space (if needed) + - Register IRQ handler (request_irq()) + - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) + - Enable DMA/processing engines + +When done using the device, and perhaps the module needs to be unloaded, +the driver needs to take the follow steps: + + - Disable the device from generating IRQs + - Release the IRQ (free_irq()) + - Stop all DMA activity + - Release DMA buffers (both streaming and coherent) + - Unregister from other subsystems (e.g. scsi or netdev) + - Release MMIO/IOP resources + - Disable the device + +Most of these topics are covered in the following sections. +For the rest look at LDD3 or . + +If the PCI subsystem is not configured (CONFIG_PCI is not set), most of +the PCI functions described below are defined as inline functions either +completely empty or just returning an appropriate error codes to avoid +lots of ifdefs in the drivers. + + +pci_register_driver() call +========================== + +PCI device drivers call ``pci_register_driver()`` during their +initialization with a pointer to a structure describing the driver +(``struct pci_driver``): + +.. kernel-doc:: include/linux/pci.h + :functions: pci_driver + +The ID table is an array of ``struct pci_device_id`` entries ending with an +all-zero entry. Definitions with static const are generally preferred. + +.. kernel-doc:: include/linux/mod_devicetable.h + :functions: pci_device_id + +Most drivers only need ``PCI_DEVICE()`` or ``PCI_DEVICE_CLASS()`` to set up +a pci_device_id table. + +New PCI IDs may be added to a device driver pci_ids table at runtime +as shown below:: + + echo "vendor device subvendor subdevice class class_mask driver_data" > \ + /sys/bus/pci/drivers/{driver}/new_id + +All fields are passed in as hexadecimal values (no leading 0x). +The vendor and device fields are mandatory, the others are optional. Users +need pass only as many optional fields as necessary: + + - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) + - class and classmask fields default to 0 + - driver_data defaults to 0UL. + +Note that driver_data must match the value used by any of the pci_device_id +entries defined in the driver. This makes the driver_data field mandatory +if all the pci_device_id entries have a non-zero driver_data value. + +Once added, the driver probe routine will be invoked for any unclaimed +PCI devices listed in its (newly updated) pci_ids list. + +When the driver exits, it just calls pci_unregister_driver() and the PCI layer +automatically calls the remove hook for all devices handled by the driver. + + +"Attributes" for driver functions/data +-------------------------------------- + +Please mark the initialization and cleanup functions where appropriate +(the corresponding macros are defined in ): + + ====== ================================================= + __init Initialization code. Thrown away after the driver + initializes. + __exit Exit code. Ignored for non-modular drivers. + ====== ================================================= + +Tips on when/where to use the above attributes: + - The module_init()/module_exit() functions (and all + initialization functions called _only_ from these) + should be marked __init/__exit. + + - Do not mark the struct pci_driver. + + - Do NOT mark a function if you are not sure which mark to use. + Better to not mark the function than mark the function wrong. + + +How to find PCI devices manually +================================ + +PCI drivers should have a really good reason for not using the +pci_register_driver() interface to search for PCI devices. +The main reason PCI devices are controlled by multiple drivers +is because one PCI device implements several different HW services. +E.g. combined serial/parallel port/floppy controller. + +A manual search may be performed using the following constructs: + +Searching by vendor and device ID:: + + struct pci_dev *dev = NULL; + while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev)) + configure_device(dev); + +Searching by class ID (iterate in a similar way):: + + pci_get_class(CLASS_ID, dev) + +Searching by both vendor/device and subsystem vendor/device ID:: + + pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). + +You can use the constant PCI_ANY_ID as a wildcard replacement for +VENDOR_ID or DEVICE_ID. This allows searching for any device from a +specific vendor, for example. + +These functions are hotplug-safe. They increment the reference count on +the pci_dev that they return. You must eventually (possibly at module unload) +decrement the reference count on these devices by calling pci_dev_put(). + + +Device Initialization Steps +=========================== + +As noted in the introduction, most PCI drivers need the following steps +for device initialization: + + - Enable the device + - Request MMIO/IOP resources + - Set the DMA mask size (for both coherent and streaming DMA) + - Allocate and initialize shared control data (pci_allocate_coherent()) + - Access device configuration space (if needed) + - Register IRQ handler (request_irq()) + - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) + - Enable DMA/processing engines. + +The driver can access PCI config space registers at any time. +(Well, almost. When running BIST, config space can go away...but +that will just result in a PCI Bus Master Abort and config reads +will return garbage). + + +Enable the PCI device +--------------------- +Before touching any device registers, the driver needs to enable +the PCI device by calling pci_enable_device(). This will: + + - wake up the device if it was in suspended state, + - allocate I/O and memory regions of the device (if BIOS did not), + - allocate an IRQ (if BIOS did not). + +.. note:: + pci_enable_device() can fail! Check the return value. + +.. warning:: + OS BUG: we don't check resource allocations before enabling those + resources. The sequence would make more sense if we called + pci_request_resources() before calling pci_enable_device(). + Currently, the device drivers can't detect the bug when when two + devices have been allocated the same range. This is not a common + problem and unlikely to get fixed soon. + + This has been discussed before but not changed as of 2.6.19: + http://lkml.org/lkml/2006/3/2/194 + + +pci_set_master() will enable DMA by setting the bus master bit +in the PCI_COMMAND register. It also fixes the latency timer value if +it's set to something bogus by the BIOS. pci_clear_master() will +disable DMA by clearing the bus master bit. + +If the PCI device can use the PCI Memory-Write-Invalidate transaction, +call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval +and also ensures that the cache line size register is set correctly. +Check the return value of pci_set_mwi() as not all architectures +or chip-sets may support Memory-Write-Invalidate. Alternatively, +if Mem-Wr-Inval would be nice to have but is not required, call +pci_try_set_mwi() to have the system do its best effort at enabling +Mem-Wr-Inval. + + +Request MMIO/IOP resources +-------------------------- +Memory (MMIO), and I/O port addresses should NOT be read directly +from the PCI device config space. Use the values in the pci_dev structure +as the PCI "bus address" might have been remapped to a "host physical" +address by the arch/chip-set specific kernel support. + +See Documentation/io-mapping.txt for how to access device registers +or device memory. + +The device driver needs to call pci_request_region() to verify +no other device is already using the same address resource. +Conversely, drivers should call pci_release_region() AFTER +calling pci_disable_device(). +The idea is to prevent two devices colliding on the same address range. + +.. tip:: + See OS BUG comment above. Currently (2.6.19), The driver can only + determine MMIO and IO Port resource availability _after_ calling + pci_enable_device(). + +Generic flavors of pci_request_region() are request_mem_region() +(for MMIO ranges) and request_region() (for IO Port ranges). +Use these for address resources that are not described by "normal" PCI +BARs. + +Also see pci_request_selected_regions() below. + + +Set the DMA mask size +--------------------- +.. note:: + If anything below doesn't make sense, please refer to + Documentation/DMA-API.txt. This section is just a reminder that + drivers need to indicate DMA capabilities of the device and is not + an authoritative source for DMA interfaces. + +While all drivers should explicitly indicate the DMA capability +(e.g. 32 or 64 bit) of the PCI bus master, devices with more than +32-bit bus master capability for streaming data need the driver +to "register" this capability by calling pci_set_dma_mask() with +appropriate parameters. In general this allows more efficient DMA +on systems where System RAM exists above 4G _physical_ address. + +Drivers for all PCI-X and PCIe compliant devices must call +pci_set_dma_mask() as they are 64-bit DMA devices. + +Similarly, drivers must also "register" this capability if the device +can directly address "consistent memory" in System RAM above 4G physical +address by calling pci_set_consistent_dma_mask(). +Again, this includes drivers for all PCI-X and PCIe compliant devices. +Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are +64-bit DMA capable for payload ("streaming") data but not control +("consistent") data. + + +Setup shared control data +------------------------- +Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared) +memory. See Documentation/DMA-API.txt for a full description of +the DMA APIs. This section is just a reminder that it needs to be done +before enabling DMA on the device. + + +Initialize device registers +--------------------------- +Some drivers will need specific "capability" fields programmed +or other "vendor specific" register initialized or reset. +E.g. clearing pending interrupts. + + +Register IRQ handler +-------------------- +While calling request_irq() is the last step described here, +this is often just another intermediate step to initialize a device. +This step can often be deferred until the device is opened for use. + +All interrupt handlers for IRQ lines should be registered with IRQF_SHARED +and use the devid to map IRQs to devices (remember that all PCI IRQ lines +can be shared). + +request_irq() will associate an interrupt handler and device handle +with an interrupt number. Historically interrupt numbers represent +IRQ lines which run from the PCI device to the Interrupt controller. +With MSI and MSI-X (more below) the interrupt number is a CPU "vector". + +request_irq() also enables the interrupt. Make sure the device is +quiesced and does not have any interrupts pending before registering +the interrupt handler. + +MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts" +which deliver interrupts to the CPU via a DMA write to a Local APIC. +The fundamental difference between MSI and MSI-X is how multiple +"vectors" get allocated. MSI requires contiguous blocks of vectors +while MSI-X can allocate several individual ones. + +MSI capability can be enabled by calling pci_alloc_irq_vectors() with the +PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This +causes the PCI support to program CPU vector data into the PCI device +capability registers. Many architectures, chip-sets, or BIOSes do NOT +support MSI or MSI-X and a call to pci_alloc_irq_vectors with just +the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always +specify PCI_IRQ_LEGACY as well. + +Drivers that have different interrupt handlers for MSI/MSI-X and +legacy INTx should chose the right one based on the msi_enabled +and msix_enabled flags in the pci_dev structure after calling +pci_alloc_irq_vectors. + +There are (at least) two really good reasons for using MSI: + +1) MSI is an exclusive interrupt vector by definition. + This means the interrupt handler doesn't have to verify + its device caused the interrupt. + +2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed + to be visible to the host CPU(s) when the MSI is delivered. This + is important for both data coherency and avoiding stale control data. + This guarantee allows the driver to omit MMIO reads to flush + the DMA stream. + +See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples +of MSI/MSI-X usage. + + +PCI device shutdown +=================== + +When a PCI device driver is being unloaded, most of the following +steps need to be performed: + + - Disable the device from generating IRQs + - Release the IRQ (free_irq()) + - Stop all DMA activity + - Release DMA buffers (both streaming and consistent) + - Unregister from other subsystems (e.g. scsi or netdev) + - Disable device from responding to MMIO/IO Port addresses + - Release MMIO/IO Port resource(s) + + +Stop IRQs on the device +----------------------- +How to do this is chip/device specific. If it's not done, it opens +the possibility of a "screaming interrupt" if (and only if) +the IRQ is shared with another device. + +When the shared IRQ handler is "unhooked", the remaining devices +using the same IRQ line will still need the IRQ enabled. Thus if the +"unhooked" device asserts IRQ line, the system will respond assuming +it was one of the remaining devices asserted the IRQ line. Since none +of the other devices will handle the IRQ, the system will "hang" until +it decides the IRQ isn't going to get handled and masks the IRQ (100,000 +iterations later). Once the shared IRQ is masked, the remaining devices +will stop functioning properly. Not a nice situation. + +This is another reason to use MSI or MSI-X if it's available. +MSI and MSI-X are defined to be exclusive interrupts and thus +are not susceptible to the "screaming interrupt" problem. + + +Release the IRQ +--------------- +Once the device is quiesced (no more IRQs), one can call free_irq(). +This function will return control once any pending IRQs are handled, +"unhook" the drivers IRQ handler from that IRQ, and finally release +the IRQ if no one else is using it. + + +Stop all DMA activity +--------------------- +It's extremely important to stop all DMA operations BEFORE attempting +to deallocate DMA control data. Failure to do so can result in memory +corruption, hangs, and on some chip-sets a hard crash. + +Stopping DMA after stopping the IRQs can avoid races where the +IRQ handler might restart DMA engines. + +While this step sounds obvious and trivial, several "mature" drivers +didn't get this step right in the past. + + +Release DMA buffers +------------------- +Once DMA is stopped, clean up streaming DMA first. +I.e. unmap data buffers and return buffers to "upstream" +owners if there is one. + +Then clean up "consistent" buffers which contain the control data. + +See Documentation/DMA-API.txt for details on unmapping interfaces. + + +Unregister from other subsystems +-------------------------------- +Most low level PCI device drivers support some other subsystem +like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your +driver isn't losing resources from that other subsystem. +If this happens, typically the symptom is an Oops (panic) when +the subsystem attempts to call into a driver that has been unloaded. + + +Disable Device from responding to MMIO/IO Port addresses +-------------------------------------------------------- +io_unmap() MMIO or IO Port resources and then call pci_disable_device(). +This is the symmetric opposite of pci_enable_device(). +Do not access device registers after calling pci_disable_device(). + + +Release MMIO/IO Port Resource(s) +-------------------------------- +Call pci_release_region() to mark the MMIO or IO Port range as available. +Failure to do so usually results in the inability to reload the driver. + + +How to access PCI config space +============================== + +You can use `pci_(read|write)_config_(byte|word|dword)` to access the config +space of a device represented by `struct pci_dev *`. All these functions return +0 when successful or an error code (`PCIBIOS_...`) which can be translated to a +text string by pcibios_strerror. Most drivers expect that accesses to valid PCI +devices don't fail. + +If you don't have a struct pci_dev available, you can call +`pci_bus_(read|write)_config_(byte|word|dword)` to access a given device +and function on that bus. + +If you access fields in the standard portion of the config header, please +use symbolic names of locations and bits declared in . + +If you need to access Extended PCI Capability registers, just call +pci_find_capability() for the particular capability and it will find the +corresponding register block for you. + + +Other interesting functions +=========================== + +============================= ================================================ +pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain, + bus and slot and number. If the device is + found, its reference count is increased. +pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) +pci_find_capability() Find specified capability in device's capability + list. +pci_resource_start() Returns bus start address for a given PCI region +pci_resource_end() Returns bus end address for a given PCI region +pci_resource_len() Returns the byte length of a PCI region +pci_set_drvdata() Set private driver data pointer for a pci_dev +pci_get_drvdata() Return private driver data pointer for a pci_dev +pci_set_mwi() Enable Memory-Write-Invalidate transactions. +pci_clear_mwi() Disable Memory-Write-Invalidate transactions. +============================= ================================================ + + +Miscellaneous hints +=================== + +When displaying PCI device names to the user (for example when a driver wants +to tell the user what card has it found), please use pci_name(pci_dev). + +Always refer to the PCI devices by a pointer to the pci_dev structure. +All PCI layer functions use this identification and it's the only +reasonable one. Don't use bus/slot/function numbers except for very +special purposes -- on systems with multiple primary buses their semantics +can be pretty complex. + +Don't try to turn on Fast Back to Back writes in your driver. All devices +on the bus need to be capable of doing it, so this is something which needs +to be handled by platform and generic code, not individual drivers. + + +Vendor and device identifications +================================= + +Do not add new device or vendor IDs to include/linux/pci_ids.h unless they +are shared across multiple drivers. You can add private definitions in +your driver if they're helpful, or just use plain hex constants. + +The device IDs are arbitrary hex numbers (vendor controlled) and normally used +only in a single location, the pci_device_id table. + +Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/. +There are mirrors of the pci.ids file at http://pciids.sourceforge.net/ +and https://github.com/pciutils/pciids. + + +Obsolete functions +================== + +There are several functions which you might come across when trying to +port an old driver to the new PCI interface. They are no longer present +in the kernel as they aren't compatible with hotplug or PCI domains or +having sane locking. + +================= =========================================== +pci_find_device() Superseded by pci_get_device() +pci_find_subsys() Superseded by pci_get_subsys() +pci_find_slot() Superseded by pci_get_domain_bus_and_slot() +pci_get_slot() Superseded by pci_get_domain_bus_and_slot() +================= =========================================== + +The alternative is the traditional PCI device driver that walks PCI +device lists. This is still possible but discouraged. + + +MMIO Space and "Write Posting" +============================== + +Converting a driver from using I/O Port space to using MMIO space +often requires some additional changes. Specifically, "write posting" +needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2) +already do this. I/O Port space guarantees write transactions reach the PCI +device before the CPU can continue. Writes to MMIO space allow the CPU +to continue before the transaction reaches the PCI device. HW weenies +call this "Write Posting" because the write completion is "posted" to +the CPU before the transaction has reached its destination. + +Thus, timing sensitive code should add readl() where the CPU is +expected to wait before doing other work. The classic "bit banging" +sequence works fine for I/O Port space:: + + for (i = 8; --i; val >>= 1) { + outb(val & 1, ioport_reg); /* write bit */ + udelay(10); + } + +The same sequence for MMIO space should be:: + + for (i = 8; --i; val >>= 1) { + writeb(val & 1, mmio_reg); /* write bit */ + readb(safe_mmio_reg); /* flush posted write */ + udelay(10); + } + +It is important that "safe_mmio_reg" not have any side effects that +interferes with the correct operation of the device. + +Another case to watch out for is when resetting a PCI device. Use PCI +Configuration space reads to flush the writel(). This will gracefully +handle the PCI master abort on all platforms if the PCI device is +expected to not respond to a readl(). Most x86 platforms will allow +MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage +(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail"). diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt deleted file mode 100644 index badb26ac33dc..000000000000 --- a/Documentation/PCI/pci.txt +++ /dev/null @@ -1,636 +0,0 @@ - - How To Write Linux PCI Drivers - - by Martin Mares on 07-Feb-2000 - updated by Grant Grundler on 23-Dec-2006 - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The world of PCI is vast and full of (mostly unpleasant) surprises. -Since each CPU architecture implements different chip-sets and PCI devices -have different requirements (erm, "features"), the result is the PCI support -in the Linux kernel is not as trivial as one would wish. This short paper -tries to introduce all potential driver authors to Linux APIs for -PCI device drivers. - -A more complete resource is the third edition of "Linux Device Drivers" -by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman. -LDD3 is available for free (under Creative Commons License) from: - - http://lwn.net/Kernel/LDD3/ - -However, keep in mind that all documents are subject to "bit rot". -Refer to the source code if things are not working as described here. - -Please send questions/comments/patches about Linux PCI API to the -"Linux PCI" mailing list. - - - -0. Structure of PCI drivers -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -PCI drivers "discover" PCI devices in a system via pci_register_driver(). -Actually, it's the other way around. When the PCI generic code discovers -a new device, the driver with a matching "description" will be notified. -Details on this below. - -pci_register_driver() leaves most of the probing for devices to -the PCI layer and supports online insertion/removal of devices [thus -supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver]. -pci_register_driver() call requires passing in a table of function -pointers and thus dictates the high level structure of a driver. - -Once the driver knows about a PCI device and takes ownership, the -driver generally needs to perform the following initialization: - - Enable the device - Request MMIO/IOP resources - Set the DMA mask size (for both coherent and streaming DMA) - Allocate and initialize shared control data (pci_allocate_coherent()) - Access device configuration space (if needed) - Register IRQ handler (request_irq()) - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) - Enable DMA/processing engines - -When done using the device, and perhaps the module needs to be unloaded, -the driver needs to take the follow steps: - Disable the device from generating IRQs - Release the IRQ (free_irq()) - Stop all DMA activity - Release DMA buffers (both streaming and coherent) - Unregister from other subsystems (e.g. scsi or netdev) - Release MMIO/IOP resources - Disable the device - -Most of these topics are covered in the following sections. -For the rest look at LDD3 or . - -If the PCI subsystem is not configured (CONFIG_PCI is not set), most of -the PCI functions described below are defined as inline functions either -completely empty or just returning an appropriate error codes to avoid -lots of ifdefs in the drivers. - - - -1. pci_register_driver() call -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -PCI device drivers call pci_register_driver() during their -initialization with a pointer to a structure describing the driver -(struct pci_driver): - - field name Description - ---------- ------------------------------------------------------ - id_table Pointer to table of device ID's the driver is - interested in. Most drivers should export this - table using MODULE_DEVICE_TABLE(pci,...). - - probe This probing function gets called (during execution - of pci_register_driver() for already existing - devices or later if a new device gets inserted) for - all PCI devices which match the ID table and are not - "owned" by the other drivers yet. This function gets - passed a "struct pci_dev *" for each device whose - entry in the ID table matches the device. The probe - function returns zero when the driver chooses to - take "ownership" of the device or an error code - (negative number) otherwise. - The probe function always gets called from process - context, so it can sleep. - - remove The remove() function gets called whenever a device - being handled by this driver is removed (either during - deregistration of the driver or when it's manually - pulled out of a hot-pluggable slot). - The remove function always gets called from process - context, so it can sleep. - - suspend Put device into low power state. - suspend_late Put device into low power state. - - resume_early Wake device from low power state. - resume Wake device from low power state. - - (Please see Documentation/power/pci.txt for descriptions - of PCI Power Management and the related functions.) - - shutdown Hook into reboot_notifier_list (kernel/sys.c). - Intended to stop any idling DMA operations. - Useful for enabling wake-on-lan (NIC) or changing - the power state of a device before reboot. - e.g. drivers/net/e100.c. - - err_handler See Documentation/PCI/pci-error-recovery.txt - - -The ID table is an array of struct pci_device_id entries ending with an -all-zero entry. Definitions with static const are generally preferred. - -Each entry consists of: - - vendor,device Vendor and device ID to match (or PCI_ANY_ID) - - subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID) - subdevice, - - class Device class, subclass, and "interface" to match. - See Appendix D of the PCI Local Bus Spec or - include/linux/pci_ids.h for a full list of classes. - Most drivers do not need to specify class/class_mask - as vendor/device is normally sufficient. - - class_mask limit which sub-fields of the class field are compared. - See drivers/scsi/sym53c8xx_2/ for example of usage. - - driver_data Data private to the driver. - Most drivers don't need to use driver_data field. - Best practice is to use driver_data as an index - into a static list of equivalent device types, - instead of using it as a pointer. - - -Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up -a pci_device_id table. - -New PCI IDs may be added to a device driver pci_ids table at runtime -as shown below: - -echo "vendor device subvendor subdevice class class_mask driver_data" > \ -/sys/bus/pci/drivers/{driver}/new_id - -All fields are passed in as hexadecimal values (no leading 0x). -The vendor and device fields are mandatory, the others are optional. Users -need pass only as many optional fields as necessary: - o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) - o class and classmask fields default to 0 - o driver_data defaults to 0UL. - -Note that driver_data must match the value used by any of the pci_device_id -entries defined in the driver. This makes the driver_data field mandatory -if all the pci_device_id entries have a non-zero driver_data value. - -Once added, the driver probe routine will be invoked for any unclaimed -PCI devices listed in its (newly updated) pci_ids list. - -When the driver exits, it just calls pci_unregister_driver() and the PCI layer -automatically calls the remove hook for all devices handled by the driver. - - -1.1 "Attributes" for driver functions/data - -Please mark the initialization and cleanup functions where appropriate -(the corresponding macros are defined in ): - - __init Initialization code. Thrown away after the driver - initializes. - __exit Exit code. Ignored for non-modular drivers. - -Tips on when/where to use the above attributes: - o The module_init()/module_exit() functions (and all - initialization functions called _only_ from these) - should be marked __init/__exit. - - o Do not mark the struct pci_driver. - - o Do NOT mark a function if you are not sure which mark to use. - Better to not mark the function than mark the function wrong. - - - -2. How to find PCI devices manually -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -PCI drivers should have a really good reason for not using the -pci_register_driver() interface to search for PCI devices. -The main reason PCI devices are controlled by multiple drivers -is because one PCI device implements several different HW services. -E.g. combined serial/parallel port/floppy controller. - -A manual search may be performed using the following constructs: - -Searching by vendor and device ID: - - struct pci_dev *dev = NULL; - while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev)) - configure_device(dev); - -Searching by class ID (iterate in a similar way): - - pci_get_class(CLASS_ID, dev) - -Searching by both vendor/device and subsystem vendor/device ID: - - pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). - -You can use the constant PCI_ANY_ID as a wildcard replacement for -VENDOR_ID or DEVICE_ID. This allows searching for any device from a -specific vendor, for example. - -These functions are hotplug-safe. They increment the reference count on -the pci_dev that they return. You must eventually (possibly at module unload) -decrement the reference count on these devices by calling pci_dev_put(). - - - -3. Device Initialization Steps -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As noted in the introduction, most PCI drivers need the following steps -for device initialization: - - Enable the device - Request MMIO/IOP resources - Set the DMA mask size (for both coherent and streaming DMA) - Allocate and initialize shared control data (pci_allocate_coherent()) - Access device configuration space (if needed) - Register IRQ handler (request_irq()) - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) - Enable DMA/processing engines. - -The driver can access PCI config space registers at any time. -(Well, almost. When running BIST, config space can go away...but -that will just result in a PCI Bus Master Abort and config reads -will return garbage). - - -3.1 Enable the PCI device -~~~~~~~~~~~~~~~~~~~~~~~~~ -Before touching any device registers, the driver needs to enable -the PCI device by calling pci_enable_device(). This will: - o wake up the device if it was in suspended state, - o allocate I/O and memory regions of the device (if BIOS did not), - o allocate an IRQ (if BIOS did not). - -NOTE: pci_enable_device() can fail! Check the return value. - -[ OS BUG: we don't check resource allocations before enabling those - resources. The sequence would make more sense if we called - pci_request_resources() before calling pci_enable_device(). - Currently, the device drivers can't detect the bug when when two - devices have been allocated the same range. This is not a common - problem and unlikely to get fixed soon. - - This has been discussed before but not changed as of 2.6.19: - http://lkml.org/lkml/2006/3/2/194 -] - -pci_set_master() will enable DMA by setting the bus master bit -in the PCI_COMMAND register. It also fixes the latency timer value if -it's set to something bogus by the BIOS. pci_clear_master() will -disable DMA by clearing the bus master bit. - -If the PCI device can use the PCI Memory-Write-Invalidate transaction, -call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval -and also ensures that the cache line size register is set correctly. -Check the return value of pci_set_mwi() as not all architectures -or chip-sets may support Memory-Write-Invalidate. Alternatively, -if Mem-Wr-Inval would be nice to have but is not required, call -pci_try_set_mwi() to have the system do its best effort at enabling -Mem-Wr-Inval. - - -3.2 Request MMIO/IOP resources -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Memory (MMIO), and I/O port addresses should NOT be read directly -from the PCI device config space. Use the values in the pci_dev structure -as the PCI "bus address" might have been remapped to a "host physical" -address by the arch/chip-set specific kernel support. - -See Documentation/io-mapping.txt for how to access device registers -or device memory. - -The device driver needs to call pci_request_region() to verify -no other device is already using the same address resource. -Conversely, drivers should call pci_release_region() AFTER -calling pci_disable_device(). -The idea is to prevent two devices colliding on the same address range. - -[ See OS BUG comment above. Currently (2.6.19), The driver can only - determine MMIO and IO Port resource availability _after_ calling - pci_enable_device(). ] - -Generic flavors of pci_request_region() are request_mem_region() -(for MMIO ranges) and request_region() (for IO Port ranges). -Use these for address resources that are not described by "normal" PCI -BARs. - -Also see pci_request_selected_regions() below. - - -3.3 Set the DMA mask size -~~~~~~~~~~~~~~~~~~~~~~~~~ -[ If anything below doesn't make sense, please refer to - Documentation/DMA-API.txt. This section is just a reminder that - drivers need to indicate DMA capabilities of the device and is not - an authoritative source for DMA interfaces. ] - -While all drivers should explicitly indicate the DMA capability -(e.g. 32 or 64 bit) of the PCI bus master, devices with more than -32-bit bus master capability for streaming data need the driver -to "register" this capability by calling pci_set_dma_mask() with -appropriate parameters. In general this allows more efficient DMA -on systems where System RAM exists above 4G _physical_ address. - -Drivers for all PCI-X and PCIe compliant devices must call -pci_set_dma_mask() as they are 64-bit DMA devices. - -Similarly, drivers must also "register" this capability if the device -can directly address "consistent memory" in System RAM above 4G physical -address by calling pci_set_consistent_dma_mask(). -Again, this includes drivers for all PCI-X and PCIe compliant devices. -Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are -64-bit DMA capable for payload ("streaming") data but not control -("consistent") data. - - -3.4 Setup shared control data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared) -memory. See Documentation/DMA-API.txt for a full description of -the DMA APIs. This section is just a reminder that it needs to be done -before enabling DMA on the device. - - -3.5 Initialize device registers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some drivers will need specific "capability" fields programmed -or other "vendor specific" register initialized or reset. -E.g. clearing pending interrupts. - - -3.6 Register IRQ handler -~~~~~~~~~~~~~~~~~~~~~~~~ -While calling request_irq() is the last step described here, -this is often just another intermediate step to initialize a device. -This step can often be deferred until the device is opened for use. - -All interrupt handlers for IRQ lines should be registered with IRQF_SHARED -and use the devid to map IRQs to devices (remember that all PCI IRQ lines -can be shared). - -request_irq() will associate an interrupt handler and device handle -with an interrupt number. Historically interrupt numbers represent -IRQ lines which run from the PCI device to the Interrupt controller. -With MSI and MSI-X (more below) the interrupt number is a CPU "vector". - -request_irq() also enables the interrupt. Make sure the device is -quiesced and does not have any interrupts pending before registering -the interrupt handler. - -MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts" -which deliver interrupts to the CPU via a DMA write to a Local APIC. -The fundamental difference between MSI and MSI-X is how multiple -"vectors" get allocated. MSI requires contiguous blocks of vectors -while MSI-X can allocate several individual ones. - -MSI capability can be enabled by calling pci_alloc_irq_vectors() with the -PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This -causes the PCI support to program CPU vector data into the PCI device -capability registers. Many architectures, chip-sets, or BIOSes do NOT -support MSI or MSI-X and a call to pci_alloc_irq_vectors with just -the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always -specify PCI_IRQ_LEGACY as well. - -Drivers that have different interrupt handlers for MSI/MSI-X and -legacy INTx should chose the right one based on the msi_enabled -and msix_enabled flags in the pci_dev structure after calling -pci_alloc_irq_vectors. - -There are (at least) two really good reasons for using MSI: -1) MSI is an exclusive interrupt vector by definition. - This means the interrupt handler doesn't have to verify - its device caused the interrupt. - -2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed - to be visible to the host CPU(s) when the MSI is delivered. This - is important for both data coherency and avoiding stale control data. - This guarantee allows the driver to omit MMIO reads to flush - the DMA stream. - -See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples -of MSI/MSI-X usage. - - - -4. PCI device shutdown -~~~~~~~~~~~~~~~~~~~~~~~ - -When a PCI device driver is being unloaded, most of the following -steps need to be performed: - - Disable the device from generating IRQs - Release the IRQ (free_irq()) - Stop all DMA activity - Release DMA buffers (both streaming and consistent) - Unregister from other subsystems (e.g. scsi or netdev) - Disable device from responding to MMIO/IO Port addresses - Release MMIO/IO Port resource(s) - - -4.1 Stop IRQs on the device -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -How to do this is chip/device specific. If it's not done, it opens -the possibility of a "screaming interrupt" if (and only if) -the IRQ is shared with another device. - -When the shared IRQ handler is "unhooked", the remaining devices -using the same IRQ line will still need the IRQ enabled. Thus if the -"unhooked" device asserts IRQ line, the system will respond assuming -it was one of the remaining devices asserted the IRQ line. Since none -of the other devices will handle the IRQ, the system will "hang" until -it decides the IRQ isn't going to get handled and masks the IRQ (100,000 -iterations later). Once the shared IRQ is masked, the remaining devices -will stop functioning properly. Not a nice situation. - -This is another reason to use MSI or MSI-X if it's available. -MSI and MSI-X are defined to be exclusive interrupts and thus -are not susceptible to the "screaming interrupt" problem. - - -4.2 Release the IRQ -~~~~~~~~~~~~~~~~~~~ -Once the device is quiesced (no more IRQs), one can call free_irq(). -This function will return control once any pending IRQs are handled, -"unhook" the drivers IRQ handler from that IRQ, and finally release -the IRQ if no one else is using it. - - -4.3 Stop all DMA activity -~~~~~~~~~~~~~~~~~~~~~~~~~ -It's extremely important to stop all DMA operations BEFORE attempting -to deallocate DMA control data. Failure to do so can result in memory -corruption, hangs, and on some chip-sets a hard crash. - -Stopping DMA after stopping the IRQs can avoid races where the -IRQ handler might restart DMA engines. - -While this step sounds obvious and trivial, several "mature" drivers -didn't get this step right in the past. - - -4.4 Release DMA buffers -~~~~~~~~~~~~~~~~~~~~~~~ -Once DMA is stopped, clean up streaming DMA first. -I.e. unmap data buffers and return buffers to "upstream" -owners if there is one. - -Then clean up "consistent" buffers which contain the control data. - -See Documentation/DMA-API.txt for details on unmapping interfaces. - - -4.5 Unregister from other subsystems -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Most low level PCI device drivers support some other subsystem -like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your -driver isn't losing resources from that other subsystem. -If this happens, typically the symptom is an Oops (panic) when -the subsystem attempts to call into a driver that has been unloaded. - - -4.6 Disable Device from responding to MMIO/IO Port addresses -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -io_unmap() MMIO or IO Port resources and then call pci_disable_device(). -This is the symmetric opposite of pci_enable_device(). -Do not access device registers after calling pci_disable_device(). - - -4.7 Release MMIO/IO Port Resource(s) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Call pci_release_region() to mark the MMIO or IO Port range as available. -Failure to do so usually results in the inability to reload the driver. - - - -5. How to access PCI config space -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can use pci_(read|write)_config_(byte|word|dword) to access the config -space of a device represented by struct pci_dev *. All these functions return 0 -when successful or an error code (PCIBIOS_...) which can be translated to a text -string by pcibios_strerror. Most drivers expect that accesses to valid PCI -devices don't fail. - -If you don't have a struct pci_dev available, you can call -pci_bus_(read|write)_config_(byte|word|dword) to access a given device -and function on that bus. - -If you access fields in the standard portion of the config header, please -use symbolic names of locations and bits declared in . - -If you need to access Extended PCI Capability registers, just call -pci_find_capability() for the particular capability and it will find the -corresponding register block for you. - - - -6. Other interesting functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain, - bus and slot and number. If the device is - found, its reference count is increased. -pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) -pci_find_capability() Find specified capability in device's capability - list. -pci_resource_start() Returns bus start address for a given PCI region -pci_resource_end() Returns bus end address for a given PCI region -pci_resource_len() Returns the byte length of a PCI region -pci_set_drvdata() Set private driver data pointer for a pci_dev -pci_get_drvdata() Return private driver data pointer for a pci_dev -pci_set_mwi() Enable Memory-Write-Invalidate transactions. -pci_clear_mwi() Disable Memory-Write-Invalidate transactions. - - - -7. Miscellaneous hints -~~~~~~~~~~~~~~~~~~~~~~ - -When displaying PCI device names to the user (for example when a driver wants -to tell the user what card has it found), please use pci_name(pci_dev). - -Always refer to the PCI devices by a pointer to the pci_dev structure. -All PCI layer functions use this identification and it's the only -reasonable one. Don't use bus/slot/function numbers except for very -special purposes -- on systems with multiple primary buses their semantics -can be pretty complex. - -Don't try to turn on Fast Back to Back writes in your driver. All devices -on the bus need to be capable of doing it, so this is something which needs -to be handled by platform and generic code, not individual drivers. - - - -8. Vendor and device identifications -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Do not add new device or vendor IDs to include/linux/pci_ids.h unless they -are shared across multiple drivers. You can add private definitions in -your driver if they're helpful, or just use plain hex constants. - -The device IDs are arbitrary hex numbers (vendor controlled) and normally used -only in a single location, the pci_device_id table. - -Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/. -There are mirrors of the pci.ids file at http://pciids.sourceforge.net/ -and https://github.com/pciutils/pciids. - - - -9. Obsolete functions -~~~~~~~~~~~~~~~~~~~~~ - -There are several functions which you might come across when trying to -port an old driver to the new PCI interface. They are no longer present -in the kernel as they aren't compatible with hotplug or PCI domains or -having sane locking. - -pci_find_device() Superseded by pci_get_device() -pci_find_subsys() Superseded by pci_get_subsys() -pci_find_slot() Superseded by pci_get_domain_bus_and_slot() -pci_get_slot() Superseded by pci_get_domain_bus_and_slot() - - -The alternative is the traditional PCI device driver that walks PCI -device lists. This is still possible but discouraged. - - - -10. MMIO Space and "Write Posting" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Converting a driver from using I/O Port space to using MMIO space -often requires some additional changes. Specifically, "write posting" -needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2) -already do this. I/O Port space guarantees write transactions reach the PCI -device before the CPU can continue. Writes to MMIO space allow the CPU -to continue before the transaction reaches the PCI device. HW weenies -call this "Write Posting" because the write completion is "posted" to -the CPU before the transaction has reached its destination. - -Thus, timing sensitive code should add readl() where the CPU is -expected to wait before doing other work. The classic "bit banging" -sequence works fine for I/O Port space: - - for (i = 8; --i; val >>= 1) { - outb(val & 1, ioport_reg); /* write bit */ - udelay(10); - } - -The same sequence for MMIO space should be: - - for (i = 8; --i; val >>= 1) { - writeb(val & 1, mmio_reg); /* write bit */ - readb(safe_mmio_reg); /* flush posted write */ - udelay(10); - } - -It is important that "safe_mmio_reg" not have any side effects that -interferes with the correct operation of the device. - -Another case to watch out for is when resetting a PCI device. Use PCI -Configuration space reads to flush the writel(). This will gracefully -handle the PCI master abort on all platforms if the PCI device is -expected to not respond to a readl(). Most x86 platforms will allow -MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage -(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail"). - diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 448621c32e4d..664c0fb1d53d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -16,6 +16,25 @@ typedef unsigned long kernel_ulong_t; #define PCI_ANY_ID (~0) +/** + * struct pci_device_id - PCI device ID structure + * @vendor: Vendor ID to match (or PCI_ANY_ID) + * @device: Device ID to match (or PCI_ANY_ID) + * @subvendor: Subsystem vendor ID to match (or PCI_ANY_ID) + * @subdevice: Subsystem device ID to match (or PCI_ANY_ID) + * @class: Device class, subclass, and "interface" to match. + * See Appendix D of the PCI Local Bus Spec or + * include/linux/pci_ids.h for a full list of classes. + * Most drivers do not need to specify class/class_mask + * as vendor/device is normally sufficient. + * @class_mask: Limit which sub-fields of the class field are compared. + * See drivers/scsi/sym53c8xx_2/ for example of usage. + * @driver_data: Data private to the driver. + * Most drivers don't need to use driver_data field. + * Best practice is to use driver_data as an index + * into a static list of equivalent device types, + * instead of using it as a pointer. + */ struct pci_device_id { __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ @@ -257,17 +276,17 @@ struct pcmcia_device_id { __u16 match_flags; __u16 manf_id; - __u16 card_id; + __u16 card_id; - __u8 func_id; + __u8 func_id; /* for real multi-function devices */ - __u8 function; + __u8 function; /* for pseudo multi-function devices */ - __u8 device_no; + __u8 device_no; - __u32 prod_id_hash[4]; + __u32 prod_id_hash[4]; /* not matched against in kernelspace */ const char * prod_id[4]; diff --git a/include/linux/pci.h b/include/linux/pci.h index 4a5a84d7bdd4..b74b2a4e6df2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -151,6 +151,8 @@ static inline const char *pci_power_name(pci_power_t state) #define PCI_PM_BUS_WAIT 50 /** + * typedef pci_channel_state_t + * * The pci_channel state describes connectivity between the CPU and * the PCI device. If some PCI bus between here and the PCI device * has crashed or locked up, this info is reflected here. @@ -775,6 +777,50 @@ struct pci_error_handlers { struct module; + +/** + * struct pci_driver - PCI driver structure + * @node: List of driver structures. + * @name: Driver name. + * @id_table: Pointer to table of device IDs the driver is + * interested in. Most drivers should export this + * table using MODULE_DEVICE_TABLE(pci,...). + * @probe: This probing function gets called (during execution + * of pci_register_driver() for already existing + * devices or later if a new device gets inserted) for + * all PCI devices which match the ID table and are not + * "owned" by the other drivers yet. This function gets + * passed a "struct pci_dev \*" for each device whose + * entry in the ID table matches the device. The probe + * function returns zero when the driver chooses to + * take "ownership" of the device or an error code + * (negative number) otherwise. + * The probe function always gets called from process + * context, so it can sleep. + * @remove: The remove() function gets called whenever a device + * being handled by this driver is removed (either during + * deregistration of the driver or when it's manually + * pulled out of a hot-pluggable slot). + * The remove function always gets called from process + * context, so it can sleep. + * @suspend: Put device into low power state. + * @suspend_late: Put device into low power state. + * @resume_early: Wake device from low power state. + * @resume: Wake device from low power state. + * (Please see Documentation/power/pci.txt for descriptions + * of PCI Power Management and the related functions.) + * @shutdown: Hook into reboot_notifier_list (kernel/sys.c). + * Intended to stop any idling DMA operations. + * Useful for enabling wake-on-lan (NIC) or changing + * the power state of a device before reboot. + * e.g. drivers/net/e100.c. + * @sriov_configure: Optional driver callback to allow configuration of + * number of VFs to enable via sysfs "sriov_numvfs" file. + * @err_handler: See Documentation/PCI/pci-error-recovery.rst + * @groups: Sysfs attribute groups. + * @driver: Driver model structure. + * @dynids: List of dynamically added device IDs. + */ struct pci_driver { struct list_head node; const char *name; @@ -2206,7 +2252,7 @@ static inline u8 pci_vpd_srdt_tag(const u8 *srdt) /** * pci_vpd_info_field_size - Extracts the information field length - * @lrdt: Pointer to the beginning of an information field header + * @info_field: Pointer to the beginning of an information field header * * Returns the extracted information field length. */ -- cgit v1.2.3 From 9c3c0c2048149d946d7f3ebdcbe70e2946750bfb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Apr 2019 22:43:36 +0200 Subject: isdn: remove isdn4linux With all isdn4linux hardware drivers gone, this is only a wrapper around CAPI to support old user space. However, from looking at the mailing list, it seems that the last time anyone asked about it was in 2014, when the upgrade from a linux-2.4 installation failed, and mISDN was suggested as a replacement. The largest public ISDN network (Deutsche Telekom) was supposed to be shut down 2018, which must have drastically reduced the number of legacy installations. When we last discussed removing i4l in 2016, Karsten Keil suggested revisiting this in 2018. I guess this is overdue. Link: http://listserv.isdn4linux.de/pipermail/isdn4linux/2014-October/006165.html Link: https://patchwork.kernel.org/patch/8484861/#17900371 Link: https://listserv.isdn4linux.de/pipermail/isdn4linux/2019-April/thread.html Signed-off-by: Arnd Bergmann --- Documentation/isdn/INTERFACE | 759 ------- Documentation/isdn/INTERFACE.fax | 163 -- Documentation/isdn/README | 599 ------ Documentation/isdn/README.FAQ | 26 - Documentation/isdn/README.audio | 138 -- Documentation/isdn/README.concap | 259 --- Documentation/isdn/README.diversion | 127 -- Documentation/isdn/README.fax | 45 - Documentation/isdn/README.hfc-pci | 41 - Documentation/isdn/README.syncppp | 58 - Documentation/isdn/README.x25 | 184 -- Documentation/isdn/syncPPP.FAQ | 224 --- Documentation/process/changes.rst | 16 +- MAINTAINERS | 2 - drivers/isdn/Kconfig | 26 - drivers/isdn/Makefile | 2 - drivers/isdn/capi/Kconfig | 9 - drivers/isdn/capi/capidrv.c | 2525 ----------------------- drivers/isdn/capi/capidrv.h | 140 -- drivers/isdn/divert/Makefile | 10 - drivers/isdn/divert/divert_init.c | 82 - drivers/isdn/divert/divert_procfs.c | 336 ---- drivers/isdn/divert/isdn_divert.c | 846 -------- drivers/isdn/divert/isdn_divert.h | 132 -- drivers/isdn/i4l/Kconfig | 127 -- drivers/isdn/i4l/Makefile | 14 - drivers/isdn/i4l/isdn_audio.c | 711 ------- drivers/isdn/i4l/isdn_audio.h | 44 - drivers/isdn/i4l/isdn_bsdcomp.c | 930 --------- drivers/isdn/i4l/isdn_common.c | 2368 ---------------------- drivers/isdn/i4l/isdn_common.h | 47 - drivers/isdn/i4l/isdn_concap.c | 99 - drivers/isdn/i4l/isdn_concap.h | 11 - drivers/isdn/i4l/isdn_net.c | 3198 ----------------------------- drivers/isdn/i4l/isdn_net.h | 151 -- drivers/isdn/i4l/isdn_ppp.c | 3046 ---------------------------- drivers/isdn/i4l/isdn_ppp.h | 41 - drivers/isdn/i4l/isdn_tty.c | 3756 ----------------------------------- drivers/isdn/i4l/isdn_tty.h | 120 -- drivers/isdn/i4l/isdn_ttyfax.c | 1123 ----------- drivers/isdn/i4l/isdn_ttyfax.h | 17 - drivers/isdn/i4l/isdn_v110.c | 625 ------ drivers/isdn/i4l/isdn_v110.h | 29 - drivers/isdn/i4l/isdn_x25iface.c | 332 ---- drivers/isdn/i4l/isdn_x25iface.h | 30 - drivers/isdn/isdnloop/Makefile | 6 - drivers/isdn/isdnloop/isdnloop.c | 1528 -------------- drivers/isdn/isdnloop/isdnloop.h | 112 -- include/linux/concap.h | 112 -- include/linux/isdn.h | 473 ----- include/linux/isdn_divertif.h | 35 - include/linux/isdn_ppp.h | 194 -- include/linux/isdnif.h | 505 ----- include/linux/wanrouter.h | 11 - include/uapi/linux/isdn.h | 144 -- include/uapi/linux/isdn_divertif.h | 31 - include/uapi/linux/isdn_ppp.h | 68 - include/uapi/linux/isdnif.h | 57 - include/uapi/linux/wanrouter.h | 18 - 59 files changed, 2 insertions(+), 26860 deletions(-) delete mode 100644 Documentation/isdn/INTERFACE delete mode 100644 Documentation/isdn/INTERFACE.fax delete mode 100644 Documentation/isdn/README delete mode 100644 Documentation/isdn/README.FAQ delete mode 100644 Documentation/isdn/README.audio delete mode 100644 Documentation/isdn/README.concap delete mode 100644 Documentation/isdn/README.diversion delete mode 100644 Documentation/isdn/README.fax delete mode 100644 Documentation/isdn/README.hfc-pci delete mode 100644 Documentation/isdn/README.syncppp delete mode 100644 Documentation/isdn/README.x25 delete mode 100644 Documentation/isdn/syncPPP.FAQ delete mode 100644 drivers/isdn/capi/capidrv.c delete mode 100644 drivers/isdn/capi/capidrv.h delete mode 100644 drivers/isdn/divert/Makefile delete mode 100644 drivers/isdn/divert/divert_init.c delete mode 100644 drivers/isdn/divert/divert_procfs.c delete mode 100644 drivers/isdn/divert/isdn_divert.c delete mode 100644 drivers/isdn/divert/isdn_divert.h delete mode 100644 drivers/isdn/i4l/Kconfig delete mode 100644 drivers/isdn/i4l/isdn_audio.c delete mode 100644 drivers/isdn/i4l/isdn_audio.h delete mode 100644 drivers/isdn/i4l/isdn_bsdcomp.c delete mode 100644 drivers/isdn/i4l/isdn_common.c delete mode 100644 drivers/isdn/i4l/isdn_common.h delete mode 100644 drivers/isdn/i4l/isdn_concap.c delete mode 100644 drivers/isdn/i4l/isdn_concap.h delete mode 100644 drivers/isdn/i4l/isdn_net.c delete mode 100644 drivers/isdn/i4l/isdn_net.h delete mode 100644 drivers/isdn/i4l/isdn_ppp.c delete mode 100644 drivers/isdn/i4l/isdn_ppp.h delete mode 100644 drivers/isdn/i4l/isdn_tty.c delete mode 100644 drivers/isdn/i4l/isdn_tty.h delete mode 100644 drivers/isdn/i4l/isdn_ttyfax.c delete mode 100644 drivers/isdn/i4l/isdn_ttyfax.h delete mode 100644 drivers/isdn/i4l/isdn_v110.c delete mode 100644 drivers/isdn/i4l/isdn_v110.h delete mode 100644 drivers/isdn/i4l/isdn_x25iface.c delete mode 100644 drivers/isdn/i4l/isdn_x25iface.h delete mode 100644 drivers/isdn/isdnloop/Makefile delete mode 100644 drivers/isdn/isdnloop/isdnloop.c delete mode 100644 drivers/isdn/isdnloop/isdnloop.h delete mode 100644 include/linux/concap.h delete mode 100644 include/linux/isdn.h delete mode 100644 include/linux/isdn_divertif.h delete mode 100644 include/linux/isdn_ppp.h delete mode 100644 include/linux/isdnif.h delete mode 100644 include/linux/wanrouter.h delete mode 100644 include/uapi/linux/isdn.h delete mode 100644 include/uapi/linux/isdn_divertif.h delete mode 100644 include/uapi/linux/isdn_ppp.h delete mode 100644 include/uapi/linux/isdnif.h delete mode 100644 include/uapi/linux/wanrouter.h (limited to 'include/linux') diff --git a/Documentation/isdn/INTERFACE b/Documentation/isdn/INTERFACE deleted file mode 100644 index 5df17e5b25c8..000000000000 --- a/Documentation/isdn/INTERFACE +++ /dev/null @@ -1,759 +0,0 @@ -$Id: INTERFACE,v 1.15.8.2 2001/03/13 16:17:07 kai Exp $ - -Description of the Interface between Linklevel and Hardwarelevel - of isdn4linux: - - - The Communication between Linklevel (LL) and Hardwarelevel (HL) - is based on the struct isdn_if (defined in isdnif.h). - - An HL-driver can register itself at LL by calling the function - register_isdn() with a pointer to that struct. Prior to that, it has - to preset some of the fields of isdn_if. The LL sets the rest of - the fields. All further communication is done via callbacks using - the function-pointers defined in isdn_if. - - Changes/Version numbering: - - During development of the ISDN subsystem, several changes have been - made to the interface. Before it went into kernel, the package - had a unique version number. The last version, distributed separately - was 0.7.4. When the subsystem went into kernel, every functional unit - got a separate version number. These numbers are shown at initialization, - separated by slashes: - - c.c/t.t/n.n/p.p/a.a/v.v - - where - - c.c is the revision of the common code. - t.t is the revision of the tty related code. - n.n is the revision of the network related code. - p.p is the revision of the ppp related code. - a.a is the revision of the audio related code. - v.v is the revision of the V.110 related code. - - Changes in this document are marked with '***CHANGEx' where x representing - the version number. If that number starts with 0, it refers to the old, - separately distributed package. If it starts with one of the letters - above, it refers to the revision of the corresponding module. - ***CHANGEIx refers to the revision number of the isdnif.h - -1. Description of the fields of isdn_if: - - int channels; - - This field has to be set by the HL-driver to the number of channels - supported prior to calling register_isdn(). Upon return of the call, - the LL puts an id there, which has to be used by the HL-driver when - invoking the other callbacks. - - int maxbufsize; - - ***CHANGE0.6: New since this version. - - Also to be preset by the HL-driver. With this value the HL-driver - tells the LL the maximum size of a data-packet it will accept. - - unsigned long features; - - To be preset by the HL-driver. Using this field, the HL-driver - announces the features supported. At the moment this is limited to - report the supported layer2 and layer3-protocols. For setting this - field the constants ISDN_FEATURE..., declared in isdnif.h have to be - used. - - ***CHANGE0.7.1: The line type (1TR6, EDSS1) has to be set. - - unsigned short hl_hdrlen; - - ***CHANGE0.7.4: New field. - - To be preset by the HL-driver, if it supports sk_buff's. The driver - should put here the amount of additional space needed in sk_buff's for - its internal purposes. Drivers not supporting sk_buff's should - initialize this field to 0. - - void (*rcvcallb_skb)(int, int, struct sk_buff *) - - ***CHANGE0.7.4: New field. - - This field will be set by LL. The HL-driver delivers received data- - packets by calling this function. Upon calling, the HL-driver must - already have its private data pulled off the head of the sk_buff. - - Parameter: - int driver-Id - int Channel-number locally to the driver. (starting with 0) - struct sk_buff * Pointer to sk_buff, containing received data. - - int (*statcallb)(isdn_ctrl*); - - This field will be set by LL. This function has to be called by the - HL-driver for signaling status-changes or other events to the LL. - - Parameter: - isdn_ctrl* - - The struct isdn_ctrl also defined in isdn_if. The exact meanings of its - fields are described together with the descriptions of the possible - events. Here is only a short description of the fields: - - driver = driver Id. - command = event-type. (one of the constants ISDN_STAT_...) - arg = depends on event-type. - num = depends on event-type. - - Returnvalue: - 0 on success, else -1 - - int (*command)(isdn_ctrl*); - - This field has to be preset by the HL-driver. It points to a function, - to be called by LL to perform functions like dialing, B-channel - setup, etc. The exact meaning of the parameters is described with the - descriptions of the possible commands. - - Parameter: - isdn_ctrl* - driver = driver-Id - command = command to perform. (one of the constants ISDN_CMD_...) - arg = depends on command. - num = depends on command. - - Returnvalue: - >=0 on success, else error-code (-ENODEV etc.) - - int (*writebuf_skb)(int, int, int, struct sk_buff *) - - ***CHANGE0.7.4: New field. - ***CHANGEI.1.21: New field. - - This field has to be preset by the HL-driver. The given function will - be called by the LL for delivering data to be send via B-Channel. - - - Parameter: - int driver-Id ***CHANGE0.7.4: New parameter. - int channel-number locally to the HL-driver. (starts with 0) - int ack ***ChangeI1.21: New parameter - If this is !0, the driver has to signal the delivery - by sending an ISDN_STAT_BSENT. If this is 0, the driver - MUST NOT send an ISDN_STAT_BSENT. - struct sk_buff * Pointer to sk_buff containing data to be send via - B-channel. - - Returnvalue: - Length of data accepted on success, else error-code (-EINVAL on - oversized packets etc.) - - int (*writecmd)(u_char*, int, int, int, int); - - This field has to be preset by the HL-driver. The given function will be - called to perform write-requests on /dev/isdnctrl (i.e. sending commands - to the card) The data-format is hardware-specific. This function is - intended for debugging only. It is not necessary for normal operation - and never will be called by the tty-emulation- or network-code. If - this function is not supported, the driver has to set NULL here. - - Parameter: - u_char* pointer to data. - int length of data. - int flag: 0 = call from within kernel-space. (HL-driver must use - memcpy, may NOT use schedule()) - 1 = call from user-space. (HL-driver must use - memcpy_fromfs, use of schedule() allowed) - int driver-Id. - int channel-number locally to the HL-driver. (starts with 0) - -***CHANGEI1.14: The driver-Id and channel-number are new since this revision. - - Returnvalue: - Length of data accepted on success, else error-code (-EINVAL etc.) - - int (*readstat)(u_char*, int, int, int, int); - - This field has to be preset by the HL-driver. The given function will be - called to perform read-requests on /dev/isdnctrl (i.e. reading replies - from the card) The data-format is hardware-specific. This function is - intended for debugging only. It is not necessary for normal operation - and never will be called by the tty-emulation- or network-code. If - this function is not supported, the driver has to set NULL here. - - Parameter: - u_char* pointer to data. - int length of data. - int flag: 0 = call from within kernel-space. (HL-driver must use - memcpy, may NOT use schedule()) - 1 = call from user-space. (HL-driver must use - memcpy_fromfs, use of schedule() allowed) - int driver-Id. - int channel-number locally to the HL-driver. (starts with 0) - -***CHANGEI1.14: The driver-Id and channel-number are new since this revision. - - Returnvalue: - Length of data on success, else error-code (-EINVAL etc.) - - char id[20]; - ***CHANGE0.7: New since this version. - - This string has to be preset by the HL-driver. Its purpose is for - identification of the driver by the user. Eg.: it is shown in the - status-info of /dev/isdninfo. Furthermore it is used as Id for binding - net-interfaces to a specific channel. If a string of length zero is - given, upon return, isdn4linux will replace it by a generic name. (line0, - line1 etc.) It is recommended to make this string configurable during - module-load-time. (copy a global variable to this string.) For doing that, - modules 1.2.8 or newer are necessary. - -2. Description of the commands, a HL-driver has to support: - - All commands will be performed by calling the function command() described - above from within the LL. The field command of the struct-parameter will - contain the desired command, the field driver is always set to the - appropriate driver-Id. - - Until now, the following commands are defined: - -***CHANGEI1.34: The parameter "num" has been replaced by a union "parm" containing - the old "num" and a new setup_type struct used for ISDN_CMD_DIAL - and ISDN_STAT_ICALL callback. - - ISDN_CMD_IOCTL: - - This command is intended for performing ioctl-calls for configuring - hardware or similar purposes (setting port-addresses, loading firmware - etc.) For this purpose, in the LL all ioctl-calls with an argument - >= IIOCDRVCTL (0x100) will be handed transparently to this - function after subtracting 0x100 and placing the result in arg. - Example: - If a userlevel-program calls ioctl(0x101,...) the function gets - called with the field command set to 1. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_IOCTL - arg = Original ioctl-cmd - IIOCDRVCTL - parm.num = first bytes filled with (unsigned long)arg - - Returnvalue: - Depending on driver. - - - ISDN_CMD_DIAL: - - This command is used to tell the HL-driver it should dial a given - number. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_DIAL - arg = channel-number locally to the driver. (starting with 0) - - parm.setup.phone = An ASCII-String containing the number to dial. - parm.setup.eazmsn = An ASCII-Sting containing the own EAZ or MSN. - parm.setup.si1 = The Service-Indicator. - parm.setup.si2 = Additional Service-Indicator. - - If the Line has been designed as SPV (a special german - feature, meaning semi-leased-line) the phone has to - start with an "S". - ***CHANGE0.6: In previous versions the EAZ has been given in the - highbyte of arg. - ***CHANGE0.7.1: New since this version: ServiceIndicator and AddInfo. - - ISDN_CMD_ACCEPTD: - - With this command, the HL-driver is told to accept a D-Channel-setup. - (Response to an incoming call) - - Parameter: - driver = driver-Id. - command = ISDN_CMD_ACCEPTD - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - - ISDN_CMD_ACCEPTB: - - With this command, the HL-driver is told to perform a B-Channel-setup. - (after establishing D-Channel-Connection) - - Parameter: - driver = driver-Id. - command = ISDN_CMD_ACCEPTB - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - - ISDN_CMD_HANGUP: - - With this command, the HL-driver is told to hangup (B-Channel if - established first, then D-Channel). This command is also used for - actively rejecting an incoming call. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_HANGUP - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - - ISDN_CMD_CLREAZ: - - With this command, the HL-driver is told not to signal incoming - calls to the LL. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_CLREAZ - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - - ISDN_CMD_SETEAZ: - - With this command, the HL-driver is told to signal incoming calls for - the given EAZs/MSNs to the LL. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_SETEAZ - arg = channel-number locally to the driver. (starting with 0) - parm.num = ASCII-String, containing the desired EAZ's/MSN's - (comma-separated). If an empty String is given, the - HL-driver should respond to ALL incoming calls, - regardless of the destination-address. - ***CHANGE0.6: New since this version the "empty-string"-feature. - - ISDN_CMD_GETEAZ: (currently unused) - - With this command, the HL-driver is told to report the current setting - given with ISDN_CMD_SETEAZ. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_GETEAZ - arg = channel-number locally to the driver. (starting with 0) - parm.num = ASCII-String, containing the current EAZ's/MSN's - - ISDN_CMD_SETSIL: (currently unused) - - With this command, the HL-driver is told to signal only incoming - calls with the given Service-Indicators. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_SETSIL - arg = channel-number locally to the driver. (starting with 0) - parm.num = ASCII-String, containing the desired Service-Indicators. - - ISDN_CMD_GETSIL: (currently unused) - - With this command, the HL-driver is told to return the current - Service-Indicators it will respond to. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_SETSIL - arg = channel-number locally to the driver. (starting with 0) - parm.num = ASCII-String, containing the current Service-Indicators. - - ISDN_CMD_SETL2: - - With this command, the HL-driver is told to select the given Layer-2- - protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or - ISDN_CMD_ACCEPTD. - - - Parameter: - driver = driver-Id. - command = ISDN_CMD_SETL2 - arg = channel-number locally to the driver. (starting with 0) - logical or'ed with (protocol-Id << 8) - protocol-Id is one of the constants ISDN_PROTO_L2... - parm = unused. - - ISDN_CMD_GETL2: (currently unused) - - With this command, the HL-driver is told to return the current - setting of the Layer-2-protocol. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_GETL2 - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - Returnvalue: - current protocol-Id (one of the constants ISDN_L2_PROTO) - - ISDN_CMD_SETL3: - - With this command, the HL-driver is told to select the given Layer-3- - protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or - ISDN_CMD_ACCEPTD. - - - Parameter: - driver = driver-Id. - command = ISDN_CMD_SETL3 - arg = channel-number locally to the driver. (starting with 0) - logical or'ed with (protocol-Id << 8) - protocol-Id is one of the constants ISDN_PROTO_L3... - parm.fax = Pointer to T30_s fax struct. (fax usage only) - - ISDN_CMD_GETL2: (currently unused) - - With this command, the HL-driver is told to return the current - setting of the Layer-3-protocol. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_GETL3 - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - Returnvalue: - current protocol-Id (one of the constants ISDN_L3_PROTO) - - ISDN_CMD_PROCEED: - - With this command, the HL-driver is told to proceed with a incoming call. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_PROCEED - arg = channel-number locally to the driver. (starting with 0) - setup.eazmsn= empty string or string send as uus1 in DSS1 with - PROCEED message - - ISDN_CMD_ALERT: - - With this command, the HL-driver is told to alert a proceeding call. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_ALERT - arg = channel-number locally to the driver. (starting with 0) - setup.eazmsn= empty string or string send as uus1 in DSS1 with - ALERT message - - ISDN_CMD_REDIR: - - With this command, the HL-driver is told to redirect a call in proceeding - or alerting state. - - Parameter: - driver = driver-Id. - command = ISDN_CMD_REDIR - arg = channel-number locally to the driver. (starting with 0) - setup.eazmsn= empty string or string send as uus1 in DSS1 protocol - setup.screen= screening indicator - setup.phone = redirected to party number - - ISDN_CMD_PROT_IO: - - With this call, the LL-driver invokes protocol specific features through - the LL. - The call is not implicitely bound to a connection. - - Parameter: - driver = driver-Id - command = ISDN_CMD_PROT_IO - arg = The lower 8 Bits define the addressed protocol as defined - in ISDN_PTYPE..., the upper bits are used to differentiate - the protocol specific CMD. - - para = protocol and function specific. See isdnif.h for detail. - - - ISDN_CMD_FAXCMD: - - With this command the HL-driver receives a fax sub-command. - For details refer to INTERFACE.fax - - Parameter: - driver = driver-Id. - command = ISDN_CMD_FAXCMD - arg = channel-number locally to the driver. (starting with 0) - parm = unused. - - -3. Description of the events to be signaled by the HL-driver to the LL. - - All status-changes are signaled via calling the previously described - function statcallb(). The field command of the struct isdn_cmd has - to be set by the HL-driver with the appropriate Status-Id (event-number). - The field arg has to be set to the channel-number (locally to the driver, - starting with 0) to which this event applies. (Exception: STAVAIL-event) - - Until now, the following Status-Ids are defined: - - ISDN_STAT_AVAIL: - - With this call, the HL-driver signals the availability of new data - for readstat(). Used only for debugging-purposes, see description - of readstat(). - - Parameter: - driver = driver-Id - command = ISDN_STAT_STAVAIL - arg = length of available data. - parm = unused. - - ISDN_STAT_ICALL: - ISDN_STAT_ICALLW: - - With this call, the HL-driver signals an incoming call to the LL. - If ICALLW is signalled the incoming call is a waiting call without - a available B-chan. - - Parameter: - driver = driver-Id - command = ISDN_STAT_ICALL - arg = channel-number, locally to the driver. (starting with 0) - para.setup.phone = Callernumber. - para.setup.eazmsn = CalledNumber. - para.setup.si1 = Service Indicator. - para.setup.si2 = Additional Service Indicator. - para.setup.plan = octet 3 from Calling party number Information Element. - para.setup.screen = octet 3a from Calling party number Information Element. - - Return: - 0 = No device matching this call. - 1 = At least one device matching this call (RING on ttyI). - HL-driver may send ALERTING on the D-channel in this case. - 2 = Call will be rejected. - 3 = Incoming called party number is currently incomplete. - Additional digits are required. - Used for signalling with PtP connections. - 4 = Call will be held in a proceeding state - (HL driver sends PROCEEDING) - Used when a user space prog needs time to interpret a call - para.setup.eazmsn may be filled with an uus1 message of - 30 octets maximum. Empty string if no uus. - 5 = Call will be actively deflected to another party - Only available in DSS1/EURO protocol - para.setup.phone must be set to destination party number - para.setup.eazmsn may be filled with an uus1 message of - 30 octets maximum. Empty string if no uus. - -1 = An error happened. (Invalid parameters for example.) - The keypad support now is included in the dial command. - - - ISDN_STAT_RUN: - - With this call, the HL-driver signals availability of the ISDN-card. - (after initializing, loading firmware) - - Parameter: - driver = driver-Id - command = ISDN_STAT_RUN - arg = unused. - parm = unused. - - ISDN_STAT_STOP: - - With this call, the HL-driver signals unavailability of the ISDN-card. - (before unloading, while resetting/reconfiguring the card) - - Parameter: - driver = driver-Id - command = ISDN_STAT_STOP - arg = unused. - parm = unused. - - ISDN_STAT_DCONN: - - With this call, the HL-driver signals the successful establishment of - a D-Channel-connection. (Response to ISDN_CMD_ACCEPTD or ISDN_CMD_DIAL) - - Parameter: - driver = driver-Id - command = ISDN_STAT_DCONN - arg = channel-number, locally to the driver. (starting with 0) - parm = unused. - - ISDN_STAT_BCONN: - - With this call, the HL-driver signals the successful establishment of - a B-Channel-connection. (Response to ISDN_CMD_ACCEPTB or because the - remote-station has initiated establishment) - - The HL driver should call this when the logical l2/l3 protocol - connection on top of the physical B-channel is established. - - Parameter: - driver = driver-Id - command = ISDN_STAT_BCONN - arg = channel-number, locally to the driver. (starting with 0) - parm.num = ASCII-String, containing type of connection (for analog - modem only). This will be appended to the CONNECT message - e.g. 14400/V.32bis - - ISDN_STAT_DHUP: - - With this call, the HL-driver signals the shutdown of a - D-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP, - or caused by a remote-hangup or if the remote-station has actively - rejected a call. - - Parameter: - driver = driver-Id - command = ISDN_STAT_DHUP - arg = channel-number, locally to the driver. (starting with 0) - parm = unused. - - ISDN_STAT_BHUP: - - With this call, the HL-driver signals the shutdown of a - B-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP, - or caused by a remote-hangup. - - The HL driver should call this as soon as the logical l2/l3 protocol - connection on top of the physical B-channel is released. - - Parameter: - driver = driver-Id - command = ISDN_STAT_BHUP - arg = channel-number, locally to the driver. (starting with 0) - parm = unused. - - ISDN_STAT_CINF: - - With this call, the HL-driver delivers charge-unit information to the - LL. - - Parameter: - driver = driver-Id - command = ISDN_STAT_CINF - arg = channel-number, locally to the driver. (starting with 0) - parm.num = ASCII string containing charge-units (digits only). - - ISDN_STAT_LOAD: (currently unused) - - ISDN_STAT_UNLOAD: - - With this call, the HL-driver signals that it will be unloaded now. This - tells the LL to release all corresponding data-structures. - - Parameter: - driver = driver-Id - command = ISDN_STAT_UNLOAD - arg = unused. - parm = unused. - - ISDN_STAT_BSENT: - - With this call the HL-driver signals the delivery of a data-packet. - This callback is used by the network-interfaces only, tty-Emulation - does not need this call. - - Parameter: - driver = driver-Id - command = ISDN_STAT_BSENT - arg = channel-number, locally to the driver. (starting with 0) - parm.length = ***CHANGEI.1.21: New field. - the driver has to set this to the original length - of the skb at the time of receiving it from the linklevel. - - ISDN_STAT_NODCH: - - With this call, the driver has to respond to a prior ISDN_CMD_DIAL, if - no D-Channel is available. - - Parameter: - driver = driver-Id - command = ISDN_STAT_NODCH - arg = channel-number, locally to the driver. (starting with 0) - parm = unused. - - ISDN_STAT_ADDCH: - - This call is for HL-drivers, which are unable to check card-type - or numbers of supported channels before they have loaded any firmware - using ioctl. Those HL-driver simply set the channel-parameter to a - minimum channel-number when registering, and later if they know - the real amount, perform this call, allocating additional channels. - - Parameter: - driver = driver-Id - command = ISDN_STAT_ADDCH - arg = number of channels to be added. - parm = unused. - - ISDN_STAT_CAUSE: - - With this call, the HL-driver delivers CAUSE-messages to the LL. - Currently the LL does not use this messages. Their contents is simply - logged via kernel-messages. Therefore, currently the format of the - messages is completely free. However they should be printable. - - Parameter: - driver = driver-Id - command = ISDN_STAT_NODCH - arg = channel-number, locally to the driver. (starting with 0) - parm.num = ASCII string containing CAUSE-message. - - ISDN_STAT_DISPLAY: - - With this call, the HL-driver delivers DISPLAY-messages to the LL. - Currently the LL does not use this messages. - - Parameter: - driver = driver-Id - command = ISDN_STAT_DISPLAY - arg = channel-number, locally to the driver. (starting with 0) - para.display= string containing DISPLAY-message. - - ISDN_STAT_PROT: - - With this call, the HL-driver delivers protocol specific infos to the LL. - The call is not implicitely bound to a connection. - - Parameter: - driver = driver-Id - command = ISDN_STAT_PROT - arg = The lower 8 Bits define the addressed protocol as defined - in ISDN_PTYPE..., the upper bits are used to differentiate - the protocol specific STAT. - - para = protocol and function specific. See isdnif.h for detail. - - ISDN_STAT_DISCH: - - With this call, the HL-driver signals the LL to disable or enable the - use of supplied channel and driver. - The call may be used to reduce the available number of B-channels after - loading the driver. The LL has to ignore a disabled channel when searching - for free channels. The HL driver itself never delivers STAT callbacks for - disabled channels. - The LL returns a nonzero code if the operation was not successful or the - selected channel is actually regarded as busy. - - Parameter: - driver = driver-Id - command = ISDN_STAT_DISCH - arg = channel-number, locally to the driver. (starting with 0) - parm.num[0] = 0 if channel shall be disabled, else enabled. - - ISDN_STAT_L1ERR: - - ***CHANGEI1.21 new status message. - A signal can be sent to the linklevel if an Layer1-error results in - packet-loss on receive or send. The field errcode of the cmd.parm - union describes the error more precisely. - - Parameter: - driver = driver-Id - command = ISDN_STAT_L1ERR - arg = channel-number, locally to the driver. (starting with 0) - parm.errcode= ISDN_STAT_L1ERR_SEND: Packet lost while sending. - ISDN_STAT_L1ERR_RECV: Packet lost while receiving. - ISDN_STAT_FAXIND: - - With this call the HL-driver signals a fax sub-command to the LL. - For details refer to INTERFACE.fax - - Parameter: - driver = driver-Id. - command = ISDN_STAT_FAXIND - arg = channel-number, locally to the driver. (starting with 0) - parm = unused. - diff --git a/Documentation/isdn/INTERFACE.fax b/Documentation/isdn/INTERFACE.fax deleted file mode 100644 index 9c8c6d914ec7..000000000000 --- a/Documentation/isdn/INTERFACE.fax +++ /dev/null @@ -1,163 +0,0 @@ -$Id: INTERFACE.fax,v 1.2 2000/08/06 09:22:50 armin Exp $ - - -Description of the fax-subinterface between linklevel and hardwarelevel of - isdn4linux. - - The communication between linklevel (LL) and hardwarelevel (HL) for fax - is based on the struct T30_s (defined in isdnif.h). - This struct is allocated in the LL. - In order to use fax, the LL provides the pointer to this struct with the - command ISDN_CMD_SETL3 (parm.fax). This pointer expires in case of hangup - and when a new channel to a new connection is assigned. - - -Data handling: - In send-mode the HL-driver has to handle the codes and the bit-order - conversion by itself. - In receive-mode the LL-driver takes care of the bit-order conversion - (specified by +FBOR) - -Structure T30_s description: - - This structure stores the values (set by AT-commands), the remote- - capability-values and the command-codes between LL and HL. - - If the HL-driver receives ISDN_CMD_FAXCMD, all needed information - is in this struct set by the LL. - To signal information to the LL, the HL-driver has to set the - parameters and use ISDN_STAT_FAXIND. - (Please refer to INTERFACE) - -Structure T30_s: - - All members are 8-bit unsigned (__u8) - - - resolution - - rate - - width - - length - - compression - - ecm - - binary - - scantime - - id[] - Local faxmachine's parameters, set by +FDIS, +FDCS, +FLID, ... - - - r_resolution - - r_rate - - r_width - - r_length - - r_compression - - r_ecm - - r_binary - - r_scantime - - r_id[] - Remote faxmachine's parameters. To be set by HL-driver. - - - phase - Defines the actual state of fax connection. Set by HL or LL - depending on progress and type of connection. - If the phase changes because of an AT command, the LL driver - changes this value. Otherwise the HL-driver takes care of it, but - only necessary on call establishment (from IDLE to PHASE_A). - (one of the constants ISDN_FAX_PHASE_[IDLE,A,B,C,D,E]) - - - direction - Defines outgoing/send or incoming/receive connection. - (ISDN_TTY_FAX_CONN_[IN,OUT]) - - - code - Commands from LL to HL; possible constants : - ISDN_TTY_FAX_DR signals +FDR command to HL - - ISDN_TTY_FAX_DT signals +FDT command to HL - - ISDN_TTY_FAX_ET signals +FET command to HL - - - Other than that the "code" is set with the hangup-code value at - the end of connection for the +FHNG message. - - - r_code - Commands from HL to LL; possible constants : - ISDN_TTY_FAX_CFR output of +FCFR message. - - ISDN_TTY_FAX_RID output of remote ID set in r_id[] - (+FCSI/+FTSI on send/receive) - - ISDN_TTY_FAX_DCS output of +FDCS and CONNECT message, - switching to phase C. - - ISDN_TTY_FAX_ET signals end of data, - switching to phase D. - - ISDN_TTY_FAX_FCON signals the established, outgoing connection, - switching to phase B. - - ISDN_TTY_FAX_FCON_I signals the established, incoming connection, - switching to phase B. - - ISDN_TTY_FAX_DIS output of +FDIS message and values. - - ISDN_TTY_FAX_SENT signals that all data has been sent - and is acknowledged, - OK message will be sent. - - ISDN_TTY_FAX_PTS signals a msg-confirmation (page sent successful), - depending on fet value: - 0: output OK message (more pages follow) - 1: switching to phase B (next document) - - ISDN_TTY_FAX_TRAIN_OK output of +FDCS and OK message (for receive mode). - - ISDN_TTY_FAX_EOP signals end of data in receive mode, - switching to phase D. - - ISDN_TTY_FAX_HNG output of the +FHNG and value set by code and - OK message, switching to phase E. - - - - badlin - Value of +FBADLIN - - - badmul - Value of +FBADMUL - - - bor - Value of +FBOR - - - fet - Value of +FET command in send-mode. - Set by HL in receive-mode for +FET message. - - - pollid[] - ID-string, set by +FCIG - - - cq - Value of +FCQ - - - cr - Value of +FCR - - - ctcrty - Value of +FCTCRTY - - - minsp - Value of +FMINSP - - - phcto - Value of +FPHCTO - - - rel - Value of +FREL - - - nbc - Value of +FNBC (0,1) - (+FNBC is not a known class 2 fax command, I added this to change the - automatic "best capabilities" connection in the eicon HL-driver) - - -Armin -mac@melware.de - diff --git a/Documentation/isdn/README b/Documentation/isdn/README deleted file mode 100644 index 74bd2bdb455b..000000000000 --- a/Documentation/isdn/README +++ /dev/null @@ -1,599 +0,0 @@ -README for the ISDN-subsystem - -1. Preface - - 1.1 Introduction - - This README describes how to set up and how to use the different parts - of the ISDN-subsystem. - - For using the ISDN-subsystem, some additional userlevel programs are - necessary. Those programs and some contributed utilities are available - at - - ftp.isdn4linux.de - - /pub/isdn4linux/isdn4k-utils-.tar.gz - - - We also have set up a mailing-list: - - The isdn4linux-project originates in Germany, and therefore by historical - reasons, the mailing-list's primary language is german. However mails - written in english have been welcome all the time. - - to subscribe: write a email to majordomo@listserv.isdn4linux.de, - Subject irrelevant, in the message body: - subscribe isdn4linux - - To write to the mailing-list, write to isdn4linux@listserv.isdn4linux.de - - This mailinglist is bidirectionally gated to the newsgroup - - de.alt.comm.isdn4linux - - There is also a well maintained FAQ in English available at - https://www.mhessler.de/i4lfaq/ - It can be viewed online, or downloaded in sgml/text/html format. - The FAQ can also be viewed online at - https://www.isdn4linux.de/faq/i4lfaq.html - or downloaded from - ftp://ftp.isdn4linux.de/pub/isdn4linux/FAQ/ - - 1.1 Technical details - - In the following Text, the terms MSN and EAZ are used. - - MSN is the abbreviation for (M)ultiple(S)ubscriber(N)umber, and applies - to Euro(EDSS1)-type lines. Usually it is simply the phone number. - - EAZ is the abbreviation of (E)ndgeraete(A)uswahl(Z)iffer and - applies to German 1TR6-type lines. This is a one-digit string, - simply appended to the base phone number - - The internal handling is nearly identical, so replace the appropriate - term to that one, which applies to your local ISDN-environment. - - When the link-level-module isdn.o is loaded, it supports up to 16 - low-level-modules with up to 64 channels. (The number 64 is arbitrarily - chosen and can be configured at compile-time --ISDN_MAX in isdn.h). - A low-level-driver can register itself through an interface (which is - defined in isdnif.h) and gets assigned a slot. - The following char-devices are made available for each channel: - - A raw-control-device with the following functions: - write: raw D-channel-messages (format: depends on driver). - read: raw D-channel-messages (format: depends on driver). - ioctl: depends on driver, i.e. for the ICN-driver, the base-address of - the ports and the shared memory on the card can be set and read - also the boot-code and the protocol software can be loaded into - the card. - - O N L Y !!! for debugging (no locking against other devices): - One raw-data-device with the following functions: - write: data to B-channel. - read: data from B-channel. - - In addition the following devices are made available: - - 128 tty-devices (64 cuix and 64 ttyIx) with integrated modem-emulator: - The functionality is almost the same as that of a serial device - (the line-discs are handled by the kernel), which lets you run - SLIP, CSLIP and asynchronous PPP through the devices. We have tested - Seyon, minicom, CSLIP (uri-dip) PPP, mgetty, XCept and Hylafax. - - The modem-emulation supports the following: - 1.3.1 Commands: - - ATA Answer incoming call. - ATD Dial, the number may contain: - [0-9] and [,#.*WPT-S] - the latter are ignored until 'S'. - The 'S' must precede the number, if - the line is a SPV (German 1TR6). - ATE0 Echo off. - ATE1 Echo on (default). - ATH Hang-up. - ATH1 Off hook (ignored). - ATH0 Hang-up. - ATI Return "ISDN for Linux...". - ATI0 " - ATI1 " - ATI2 Report of last connection. - ATO On line (data mode). - ATQ0 Enable result codes (default). - ATQ1 Disable result codes (default). - ATSx=y Set register x to y. - ATSx? Show contents of register x. - ATV0 Numeric responses. - ATV1 English responses (default). - ATZ Load registers and EAZ/MSN from Profile. - AT&Bx Set Send-Packet-size to x (max. 4000) - The real packet-size may be limited by the - low-level-driver used. e.g. the HiSax-Module- - limit is 2000. You will get NO Error-Message, - if you set it to higher values, because at the - time of giving this command the corresponding - driver may not be selected (see "Automatic - Assignment") however the size of outgoing packets - will be limited correctly. - AT&D0 Ignore DTR - AT&D2 DTR-low-edge: Hang up and return to - command mode (default). - AT&D3 Same as AT&D2 but also resets all registers. - AT&Ex Set the EAZ/MSN for this channel to x. - AT&F Reset all registers and profile to "factory-defaults" - AT&Lx Set list of phone numbers to listen on. x is a - list of wildcard patterns separated by semicolon. - If this is set, it has precedence over the MSN set - by AT&E. - AT&Rx Select V.110 bitrate adaption. - This command enables V.110 protocol with 9600 baud - (x=9600), 19200 baud (x=19200) or 38400 baud - (x=38400). A value of x=0 disables V.110 switching - back to default X.75. This command sets the following - Registers: - Reg 14 (Layer-2 protocol): - x = 0: 0 - x = 9600: 7 - x = 19200: 8 - x = 38400: 9 - Reg 18.2 = 1 - Reg 19 (Additional Service Indicator): - x = 0: 0 - x = 9600: 197 - x = 19200: 199 - x = 38400: 198 - Note on value in Reg 19: - There is _NO_ common convention for 38400 baud. - The value 198 is chosen arbitrarily. Users - _MUST_ negotiate this value before establishing - a connection. - AT&Sx Set window-size (x = 1..8) (not yet implemented) - AT&V Show all settings. - AT&W0 Write registers and EAZ/MSN to profile. See also - iprofd (5.c in this README). - AT&X0 BTX-mode and T.70-mode off (default) - AT&X1 BTX-mode on. (S13.1=1, S13.5=0 S14=0, S16=7, S18=7, S19=0) - AT&X2 T.70-mode on. (S13.1=1, S13.5=1, S14=0, S16=7, S18=7, S19=0) - AT+Rx Resume a suspended call with CallID x (x = 1,2,3...) - AT+Sx Suspend a call with CallID x (x = 1,2,3...) - - For voice-mode commands refer to README.audio - - 1.3.2 Escape sequence: - During a connection, the emulation reacts just like - a normal modem to the escape sequence +++. - (The escape character - default '+' - can be set in the - register 2). - The DELAY must at least be 1.5 seconds long and delay - between the escape characters must not exceed 0.5 seconds. - - 1.3.3 Registers: - - Nr. Default Description - 0 0 Answer on ring number. - (no auto-answer if S0=0). - 1 0 Count of rings. - 2 43 Escape character. - (a value >= 128 disables the escape sequence). - 3 13 Carriage return character (ASCII). - 4 10 Line feed character (ASCII). - 5 8 Backspace character (ASCII). - 6 3 Delay in seconds before dialing. - 7 60 Wait for carrier. - 8 2 Pause time for comma (ignored) - 9 6 Carrier detect time (ignored) - 10 7 Carrier loss to disconnect time (ignored). - 11 70 Touch tone timing (ignored). - 12 69 Bit coded register: - Bit 0: 0 = Suppress response messages. - 1 = Show response messages. - Bit 1: 0 = English response messages. - 1 = Numeric response messages. - Bit 2: 0 = Echo off. - 1 = Echo on. - Bit 3 0 = DCD always on. - 1 = DCD follows carrier. - Bit 4 0 = CTS follows RTS - 1 = Ignore RTS, CTS always on. - Bit 5 0 = return to command mode on DTR low. - 1 = Same as 0 but also resets all - registers. - See also register 13, bit 2 - Bit 6 0 = DSR always on. - 1 = DSR only on if channel is available. - Bit 7 0 = Cisco-PPP-flag-hack off (default). - 1 = Cisco-PPP-flag-hack on. - 13 0 Bit coded register: - Bit 0: 0 = Use delayed tty-send-algorithm - 1 = Direct tty-send. - Bit 1: 0 = T.70 protocol (Only for BTX!) off - 1 = T.70 protocol (Only for BTX!) on - Bit 2: 0 = Don't hangup on DTR low. - 1 = Hangup on DTR low. - Bit 3: 0 = Standard response messages - 1 = Extended response messages - Bit 4: 0 = CALLER NUMBER before every RING. - 1 = CALLER NUMBER after first RING. - Bit 5: 0 = T.70 extended protocol off - 1 = T.70 extended protocol on - Bit 6: 0 = Special RUNG Message off - 1 = Special RUNG Message on - "RUNG" is delivered on a ttyI, if - an incoming call happened (RING) and - the remote party hung up before any - local ATA was given. - Bit 7: 0 = Don't show display messages from net - 1 = Show display messages from net - (S12 Bit 1 must be 0 too) - 14 0 Layer-2 protocol: - 0 = X75/LAPB with I-frames - 1 = X75/LAPB with UI-frames - 2 = X75/LAPB with BUI-frames - 3 = HDLC - 4 = Transparent (audio) - 7 = V.110, 9600 baud - 8 = V.110, 19200 baud - 9 = V.110, 38400 baud - 10 = Analog Modem (only if hardware supports this) - 11 = Fax G3 (only if hardware supports this) - 15 0 Layer-3 protocol: - 0 = transparent - 1 = transparent with audio features (e.g. DSP) - 2 = Fax G3 Class 2 commands (S14 has to be set to 11) - 3 = Fax G3 Class 1 commands (S14 has to be set to 11) - 16 250 Send-Packet-size/16 - 17 8 Window-size (not yet implemented) - 18 4 Bit coded register, Service-Octet-1 to accept, - or to be used on dialout: - Bit 0: Service 1 (audio) when set. - Bit 1: Service 5 (BTX) when set. - Bit 2: Service 7 (data) when set. - Note: It is possible to set more than one - bit. In this case, on incoming calls - the selected services are accepted, - and if the service is "audio", the - Layer-2-protocol is automatically - changed to 4 regardless of the setting - of register 14. On outgoing calls, - the most significant 1-bit is chosen to - select the outgoing service octet. - 19 0 Service-Octet-2 - 20 0 Bit coded register (readonly) - Service-Octet-1 of last call. - Bit mapping is the same as register 18 - 21 0 Bit coded register (readonly) - Set on incoming call (during RING) to - octet 3 of calling party number IE (Numbering plan) - See section 4.5.10 of ITU Q.931 - 22 0 Bit coded register (readonly) - Set on incoming call (during RING) to - octet 3a of calling party number IE (Screening info) - See section 4.5.10 of ITU Q.931 - 23 0 Bit coded register: - Bit 0: 0 = Add CPN to RING message off - 1 = Add CPN to RING message on - Bit 1: 0 = Add CPN to FCON message off - 1 = Add CPN to FCON message on - Bit 2: 0 = Add CDN to RING/FCON message off - 1 = Add CDN to RING/FCON message on - - Last but not least a (at the moment fairly primitive) device to request - the line-status (/dev/isdninfo) is made available. - - Automatic assignment of devices to lines: - - All inactive physical lines are listening to all EAZs for incoming - calls and are NOT assigned to a specific tty or network interface. - When an incoming call is detected, the driver looks first for a network - interface and then for an opened tty which: - - 1. is configured for the same EAZ. - 2. has the same protocol settings for the B-channel. - 3. (only for network interfaces if the security flag is set) - contains the caller number in its access list. - 4. Either the channel is not bound exclusively to another Net-interface, or - it is bound AND the other checks apply to exactly this interface. - (For usage of the bind-features, refer to the isdnctrl-man-page) - - Only when a matching interface or tty is found is the call accepted - and the "connection" between the low-level-layer and the link-level-layer - is established and kept until the end of the connection. - In all other cases no connection is established. Isdn4linux can be - configured to either do NOTHING in this case (which is useful, if - other, external devices with the same EAZ/MSN are connected to the bus) - or to reject the call actively. (isdnctrl busreject ...) - - For an outgoing call, the inactive physical lines are searched. - The call is placed on the first physical line, which supports the - requested protocols for the B-channel. If a net-interface, however - is pre-bound to a channel, this channel is used directly. - - This makes it possible to configure several network interfaces and ttys - for one EAZ, if the network interfaces are set to secure operation. - If an incoming call matches one network interface, it gets connected to it. - If another incoming call for the same EAZ arrives, which does not match - a network interface, the first tty gets a "RING" and so on. - -2 System prerequisites: - - ATTENTION! - - Always use the latest module utilities. The current version is - named in Documentation/Changes. Some old versions of insmod - are not capable of setting the driver-Ids correctly. - -3. Lowlevel-driver configuration. - - Configuration depends on how the drivers are built. See the - README. for information on driver-specific setup. - -4. Device-inodes - - The major and minor numbers and their names are described in - Documentation/admin-guide/devices.rst. The major numbers are: - - 43 for the ISDN-tty's. - 44 for the ISDN-callout-tty's. - 45 for control/info/debug devices. - -5. Application - - a) For some card-types, firmware has to be loaded into the cards, before - proceeding with device-independent setup. See README. - for how to do that. - - b) If you only intend to use ttys, you are nearly ready now. - - c) If you want to have really permanent "Modem"-settings on disk, you - can start the daemon iprofd. Give it a path to a file at the command- - line. It will store the profile-settings in this file every time - an AT&W0 is performed on any ISDN-tty. If the file already exists, - all profiles are initialized from this file. If you want to unload - any of the modules, kill iprofd first. - - d) For networking, continue: Create an interface: - isdnctrl addif isdn0 - - e) Set the EAZ (or MSN for Euro-ISDN): - isdnctrl eaz isdn0 2 - - (For 1TR6 a single digit is allowed, for Euro-ISDN the number is your - real MSN e.g.: Phone-Number) - - f) Set the number for outgoing calls on the interface: - isdnctrl addphone isdn0 out 1234567 - ... (this can be executed more than once, all assigned numbers are - tried in order) - and the number(s) for incoming calls: - isdnctrl addphone isdn0 in 1234567 - - g) Set the timeout for hang-up: - isdnctrl huptimeout isdn0 - - h) additionally you may activate charge-hang-up (= Hang up before - next charge-info, this only works, if your isdn-provider transmits - the charge-info during and after the connection): - isdnctrl chargehup isdn0 on - - i) Set the dial mode of the interface: - isdnctrl dialmode isdn0 auto - "off" means that you (or the system) cannot make any connection - (neither incoming or outgoing connections are possible). Use - this if you want to be sure that no connections will be made. - "auto" means that the interface is in auto-dial mode, and will - attempt to make a connection whenever a network data packet needs - the interface's link. Note that this can cause unexpected dialouts, - and lead to a high phone bill! Some daemons or other pc's that use - this interface can cause this. - Incoming connections are also possible. - "manual" is a dial mode created to prevent the unexpected dialouts. - In this mode, the interface will never make any connections on its - own. You must explicitly initiate a connection with "isdnctrl dial - isdn0". However, after an idle time of no traffic as configured for - the huptimeout value with isdnctrl, the connection _will_ be ended. - If you don't want any automatic hangup, set the huptimeout value to 0. - "manual" is the default. - - j) Setup the interface with ifconfig as usual, and set a route to it. - - k) (optional) If you run X11 and have Tcl/Tk-wish version 4.0, you can use - the script tools/tcltk/isdnmon. You can add actions for line-status - changes. See the comments at the beginning of the script for how to - do that. There are other tty-based tools in the tools-subdirectory - contributed by Michael Knigge (imon), Volker Götz (imontty) and - Andreas Kool (isdnmon). - - l) For initial testing, you can set the verbose-level to 2 (default: 0). - Then all incoming calls are logged, even if they are not addressed - to one of the configured net-interfaces: - isdnctrl verbose 2 - - Now you are ready! A ping to the set address should now result in an - automatic dial-out (look at syslog kernel-messages). - The phone numbers and EAZs can be assigned at any time with isdnctrl. - You can add as many interfaces as you like with addif following the - directions above. Of course, there may be some limitations. But we have - tested as many as 20 interfaces without any problem. However, if you - don't give an interface name to addif, the kernel will assign a name - which starts with "eth". The number of "eth"-interfaces is limited by - the kernel. - -5. Additional options for isdnctrl: - - "isdnctrl secure on" - Only incoming calls, for which the caller-id is listed in the access - list of the interface are accepted. You can add caller-id's With the - command "isdnctrl addphone in " - Euro-ISDN does not transmit the leading '0' of the caller-id for an - incoming call, therefore you should configure it accordingly. - If the real number for the dialout e.g. is "09311234567" the number - to configure here is "9311234567". The pattern-match function - works similar to the shell mechanism. - - ? one arbitrary digit - * zero or arbitrary many digits - [123] one of the digits in the list - [1-5] one digit between '1' and '5' - a '^' as the first character in a list inverts the list - - - "isdnctrl secure off" - Switch off secure operation (default). - - "isdnctrl ihup [on|off]" - Switch the hang-up-timer for incoming calls on or off. - - "isdnctrl eaz " - Returns the EAZ of an interface. - - "isdnctrl delphone in|out " - Deletes a number from one of the access-lists of the interface. - - "isdnctrl delif " - Removes the interface (and possible slaves) from the kernel. - (You have to unregister it with "ifconfig down" before). - - "isdnctrl callback [on|off]" - Switches an interface to callback-mode. In this mode, an incoming call - will be rejected and after this the remote-station will be called. If - you test this feature by using ping, some routers will re-dial very - quickly, so that the callback from isdn4linux may not be recognized. - In this case use ping with the option -i to increase the interval - between echo-packets. - - "isdnctrl cbdelay [seconds]" - Sets the delay (default 5 sec) between an incoming call and start of - dialing when callback is enabled. - - "isdnctrl cbhup [on|off]" - This enables (default) or disables an active hangup (reject) when getting an - incoming call for an interface which is configured for callback. - - "isdnctrl encap " - Selects the type of packet-encapsulation. The encapsulation can be changed - only while an interface is down. - - At the moment the following values are supported: - - rawip (Default) Selects raw-IP-encapsulation. This means, MAC-headers - are stripped off. - ip IP with type-field. Same as IP but the type-field of the MAC-header - is preserved. - x25iface X.25 interface encapsulation (first byte semantics as defined in - ../networking/x25-iface.txt). Use this for running the linux - X.25 network protocol stack (AF_X25 sockets) on top of isdn. - cisco-h A special-mode for communicating with a Cisco, which is configured - to do "hdlc" - ethernet No stripping. Packets are sent with full MAC-header. - The Ethernet-address of the interface is faked, from its - IP-address: fc:fc:i1:i2:i3:i4, where i1-4 are the IP-addr.-values. - syncppp Synchronous PPP - - uihdlc HDLC with UI-frame-header (for use with DOS ISPA, option -h1) - - - NOTE: x25iface encapsulation is currently experimental. Please - read README.x25 for further details - - - Watching packets, using standard-tcpdump will fail for all encapsulations - except ethernet because tcpdump does not know how to handle packets - without MAC-header. A patch for tcpdump is included in the utility-package - mentioned above. - - "isdnctrl l2_prot " - Selects a layer-2-protocol. - (With the ICN-driver and the HiSax-driver, "x75i" and "hdlc" is available. - With other drivers, "x75ui", "x75bui", "x25dte", "x25dce" may be - possible too. See README.x25 for x25 related l2 protocols.) - - isdnctrl l3_prot - The same for layer-3. (At the moment only "trans" is allowed) - - "isdnctrl list " - Shows all parameters of an interface and the charge-info. - Try "all" as the interface name. - - "isdnctrl hangup " - Forces hangup of an interface. - - "isdnctrl bind , [exclusive]" - If you are using more than one ISDN card, it is sometimes necessary to - dial out using a specific card or even preserve a specific channel for - dialout of a specific net-interface. This can be done with the above - command. Replace by whatever you assigned while loading the - module. The is counted from zero. The upper limit - depends on the card used. At the moment no card supports more than - 2 channels, so the upper limit is one. - - "isdnctrl unbind " - unbinds a previously bound interface. - - "isdnctrl busreject on|off" - If switched on, isdn4linux replies a REJECT to incoming calls, it - cannot match to any configured interface. - If switched off, nothing happens in this case. - You normally should NOT enable this feature, if the ISDN adapter is not - the only device connected to the S0-bus. Otherwise it could happen that - isdn4linux rejects an incoming call, which belongs to another device on - the bus. - - "isdnctrl addslave - Creates a slave interface for channel-bundling. Slave interfaces are - not seen by the kernel, but their ISDN-part can be configured with - isdnctrl as usual. (Phone numbers, EAZ/MSN, timeouts etc.) If more - than two channels are to be bundled, feel free to create as many as you - want. InterfaceName must be a real interface, NOT a slave. Slave interfaces - start dialing, if the master interface resp. the previous slave interface - has a load of more than 7000 cps. They hangup if the load goes under 7000 - cps, according to their "huptimeout"-parameter. - - "isdnctrl sdelay secs." - This sets the minimum time an Interface has to be fully loaded, until - it sends a dial-request to its slave. - - "isdnctrl dial " - Forces an interface to start dialing even if no packets are to be - transferred. - - "isdnctrl mapping MSN0,MSN1,MSN2,...MSN9" - This installs a mapping table for EAZ<->MSN-mapping for a single line. - Missing MSN's have to be given as "-" or can be omitted, if at the end - of the commandline. - With this command, it's now possible to have an interface listening to - mixed 1TR6- and Euro-Type lines. In this case, the interface has to be - configured to a 1TR6-type EAZ (one digit). The mapping is also valid - for tty-emulation. Seen from the interface/tty-level the mapping - CAN be used, however it's possible to use single tty's/interfaces with - real MSN's (more digits) also, in which case the mapping will be ignored. - Here is an example: - - You have a 1TR6-type line with base-nr. 1234567 and a Euro-line with - MSN's 987654, 987655 and 987656. The DriverId for the Euro-line is "EURO". - - isdnctrl mapping EURO -,987654,987655,987656,-,987655 - ... - isdnctrl eaz isdn0 1 # listen on 12345671(1tr6) and 987654(euro) - ... - isdnctrl eaz isdn1 4 # listen on 12345674(1tr6) only. - ... - isdnctrl eaz isdn2 987654 # listen on 987654(euro) only. - - Same scheme is used with AT&E... at the tty's. - -6. If you want to write a new low-level-driver, you are welcome. - The interface to the link-level-module is described in the file INTERFACE. - If the interface should be expanded for any reason, don't do it - on your own, send me a mail containing the proposed changes and - some reasoning about them. - If other drivers will not be affected, I will include the changes - in the next release. - For developers only, there is a second mailing-list. Write to me - (fritz@isdn4linux.de), if you want to join that list. - -Have fun! - - -Fritz - diff --git a/Documentation/isdn/README.FAQ b/Documentation/isdn/README.FAQ deleted file mode 100644 index e5dd1addacdd..000000000000 --- a/Documentation/isdn/README.FAQ +++ /dev/null @@ -1,26 +0,0 @@ - -The FAQ for isdn4linux -====================== - -Please note that there is a big FAQ available in the isdn4k-utils. -You find it in: - isdn4k-utils/FAQ/i4lfaq.sgml - -In case you just want to see the FAQ online, or download the newest version, -you can have a look at my website: -https://www.mhessler.de/i4lfaq/ (view + download) -or: -https://www.isdn4linux.de/faq/4lfaq.html (view) - -As the extension tells, the FAQ is in SGML format, and you can convert it -into text/html/... format by using the sgml2txt/sgml2html/... tools. -Alternatively, you can also do a 'configure; make all' in the FAQ directory. - - -Please have a look at the FAQ before posting anything in the Mailinglist, -or the newsgroup! - - -Matthias Hessler -hessler@isdn4linux.de - diff --git a/Documentation/isdn/README.audio b/Documentation/isdn/README.audio deleted file mode 100644 index 8ebca19290d9..000000000000 --- a/Documentation/isdn/README.audio +++ /dev/null @@ -1,138 +0,0 @@ -$Id: README.audio,v 1.8 1999/07/11 17:17:29 armin Exp $ - -ISDN subsystem for Linux. - Description of audio mode. - -When enabled during kernel configuration, the tty emulator of the ISDN -subsystem is capable of a reduced set of commands to support audio. -This document describes the commands supported and the format of -audio data. - -Commands for enabling/disabling audio mode: - - AT+FCLASS=8 Enable audio mode. - This affects the following registers: - S18: Bits 0 and 2 are set. - S16: Set to 48 and any further change to - larger values is blocked. - AT+FCLASS=0 Disable audio mode. - Register 18 is set to 4. - AT+FCLASS=? Show possible modes. - AT+FCLASS? Report current mode (0 or 8). - -Commands supported in audio mode: - -All audio mode commands have one of the following forms: - - AT+Vxx? Show current setting. - AT+Vxx=? Show possible settings. - AT+Vxx=v Set simple parameter. - AT+Vxx=v,v ... Set complex parameter. - -where xx is a two-character code and v are alphanumerical parameters. -The following commands are supported: - - AT+VNH=x Auto hangup setting. NO EFFECT, supported - for compatibility only. - AT+VNH? Always reporting "1" - AT+VNH=? Always reporting "1" - - AT+VIP Reset all audio parameters. - - AT+VLS=x Line select. x is one of the following: - 0 = No device. - 2 = Phone line. - AT+VLS=? Always reporting "0,2" - AT+VLS? Show current line. - - AT+VRX Start recording. Emulator responds with - CONNECT and starts sending audio data to - the application. See below for data format - - AT+VSD=x,y Set silence-detection parameters. - Possible parameters: - x = 0 ... 31 sensitivity threshold level. - (default 0 , deactivated) - y = 0 ... 255 range of interval in units - of 0.1 second. (default 70) - AT+VSD=? Report possible parameters. - AT+VSD? Show current parameters. - - AT+VDD=x,y Set DTMF-detection parameters. - Only possible if online and during this connection. - Possible parameters: - x = 0 ... 15 sensitivity threshold level. - (default 0 , I4L soft-decode) - (1-15 soft-decode off, hardware on) - y = 0 ... 255 tone duration in units of 5ms. - Not for I4L soft decode (default 8, 40ms) - AT+VDD=? Report possible parameters. - AT+VDD? Show current parameters. - - AT+VSM=x Select audio data format. - Possible parameters: - 2 = ADPCM-2 - 3 = ADPCM-3 - 4 = ADPCM-4 - 5 = aLAW - 6 = uLAW - AT+VSM=? Show possible audio formats. - - AT+VTX Start audio playback. Emulator responds - with CONNECT and starts sending audio data - received from the application via phone line. -General behavior and description of data formats/protocol. - when a connection is made: - - On incoming calls, if the application responds to a RING - with ATA, depending on the calling service, the emulator - responds with either CONNECT (data call) or VCON (voice call). - - On outgoing voice calls, the emulator responds with VCON - upon connection setup. - - Audio recording. - - When receiving audio data, a kind of bisync protocol is used. - Upon AT+VRX command, the emulator responds with CONNECT, and - starts sending audio data to the application. There are several - escape sequences defined, all using DLE (0x10) as Escape char: - - End of audio data. (i.e. caused by a - hangup of the remote side) Emulator stops - recording, responding with VCON. - Abort recording, (send by appl.) Emulator - stops recording, sends DLE,ETX. - Escape sequence for DLE in data stream. - 0 Touchtone "0" received. - ... - 9 Touchtone "9" received. - # Touchtone "#" received. - * Touchtone "*" received. - A Touchtone "A" received. - B Touchtone "B" received. - C Touchtone "C" received. - D Touchtone "D" received. - - q quiet. Silence detected after non-silence. - s silence. Silence detected from the - start of recording. - - Currently unsupported DLE sequences: - - c FAX calling tone received. - b busy tone received. - - Audio playback. - - When sending audio data, upon AT+VTX command, emulator responds with - CONNECT, and starts transferring data from application to the phone line. - The same DLE sequences apply to this mode. - - Full-Duplex-Audio: - - When _both_ commands for recording and playback are given in _one_ - AT-command-line (i.e.: "AT+VTX+VRX"), full-duplex-mode is selected. - In this mode, the only way to stop recording is sending - and the only way to stop playback is to send . - diff --git a/Documentation/isdn/README.concap b/Documentation/isdn/README.concap deleted file mode 100644 index a76d74845a4c..000000000000 --- a/Documentation/isdn/README.concap +++ /dev/null @@ -1,259 +0,0 @@ -Description of the "concap" encapsulation protocol interface -============================================================ - -The "concap" interface is intended to be used by network device -drivers that need to process an encapsulation protocol. -It is assumed that the protocol interacts with a linux network device by -- data transmission -- connection control (establish, release) -Thus, the mnemonic: "CONnection CONtrolling eNCAPsulation Protocol". - -This is currently only used inside the isdn subsystem. But it might -also be useful to other kinds of network devices. Thus, if you want -to suggest changes that improve usability or performance of the -interface, please let me know. I'm willing to include them in future -releases (even if I needed to adapt the current isdn code to the -changed interface). - - -Why is this useful? -=================== - -The encapsulation protocol used on top of WAN connections or permanent -point-to-point links are frequently chosen upon bilateral agreement. -Thus, a device driver for a certain type of hardware must support -several different encapsulation protocols at once. - -The isdn device driver did already support several different -encapsulation protocols. The encapsulation protocol is configured by a -user space utility (isdnctrl). The isdn network interface code then -uses several case statements which select appropriate actions -depending on the currently configured encapsulation protocol. - -In contrast, LAN network interfaces always used a single encapsulation -protocol which is unique to the hardware type of the interface. The LAN -encapsulation is usually done by just sticking a header on the data. Thus, -traditional linux network device drivers used to process the -encapsulation protocol directly (usually by just providing a hard_header() -method in the device structure) using some hardware type specific support -functions. This is simple, direct and efficient. But it doesn't fit all -the requirements for complex WAN encapsulations. - - - The configurability of the encapsulation protocol to be used - makes isdn network interfaces more flexible, but also much more - complex than traditional lan network interfaces. - - -Many Encapsulation protocols used on top of WAN connections will not just -stick a header on the data. They also might need to set up or release -the WAN connection. They also might want to send other data for their -private purpose over the wire, e.g. ppp does a lot of link level -negotiation before the first piece of user data can be transmitted. -Such encapsulation protocols for WAN devices are typically more complex -than encapsulation protocols for lan devices. Thus, network interface -code for typical WAN devices also tends to be more complex. - - -In order to support Linux' x25 PLP implementation on top of -isdn network interfaces I could have introduced yet another branch to -the various case statements inside drivers/isdn/isdn_net.c. -This eventually made isdn_net.c even more complex. In addition, it made -isdn_net.c harder to maintain. Thus, by identifying an abstract -interface between the network interface code and the encapsulation -protocol, complexity could be reduced and maintainability could be -increased. - - -Likewise, a similar encapsulation protocol will frequently be needed by -several different interfaces of even different hardware type, e.g. the -synchronous ppp implementation used by the isdn driver and the -asynchronous ppp implementation used by the ppp driver have a lot of -similar code in them. By cleanly separating the encapsulation protocol -from the hardware specific interface stuff such code could be shared -better in future. - - -When operating over dial-up-connections (e.g. telephone lines via modem, -non-permanent virtual circuits of wide area networks, ISDN) many -encapsulation protocols will need to control the connection. Therefore, -some basic connection control primitives are supported. The type and -semantics of the connection (i.e the ISO layer where connection service -is provided) is outside our scope and might be different depending on -the encapsulation protocol used, e.g. for a ppp module using our service -on top of a modem connection a connect_request will result in dialing -a (somewhere else configured) remote phone number. For an X25-interface -module (LAPB semantics, as defined in Documentation/networking/x25-iface.txt) -a connect_request will ask for establishing a reliable lapb -datalink connection. - - -The encapsulation protocol currently provides the following -service primitives to the network device. - -- create a new encapsulation protocol instance -- delete encapsulation protocol instance and free all its resources -- initialize (open) the encapsulation protocol instance for use. -- deactivate (close) an encapsulation protocol instance. -- process (xmit) data handed down by upper protocol layer -- receive data from lower (hardware) layer -- process connect indication from lower (hardware) layer -- process disconnect indication from lower (hardware) layer - - -The network interface driver accesses those primitives via callbacks -provided by the encapsulation protocol instance within a -struct concap_proto_ops. - -struct concap_proto_ops{ - - /* create a new encapsulation protocol instance of same type */ - struct concap_proto * (*proto_new) (void); - - /* delete encapsulation protocol instance and free all its resources. - cprot may no longer be referenced after calling this */ - void (*proto_del)(struct concap_proto *cprot); - - /* initialize the protocol's data. To be called at interface startup - or when the device driver resets the interface. All services of the - encapsulation protocol may be used after this*/ - int (*restart)(struct concap_proto *cprot, - struct net_device *ndev, - struct concap_device_ops *dops); - - /* deactivate an encapsulation protocol instance. The encapsulation - protocol may not call any *dops methods after this. */ - int (*close)(struct concap_proto *cprot); - - /* process a frame handed down to us by upper layer */ - int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb); - - /* to be called for each data entity received from lower layer*/ - int (*data_ind)(struct concap_proto *cprot, struct sk_buff *skb); - - /* to be called when a connection was set up/down. - Protocols that don't process these primitives might fill in - dummy methods here */ - int (*connect_ind)(struct concap_proto *cprot); - int (*disconn_ind)(struct concap_proto *cprot); -}; - - -The data structures are defined in the header file include/linux/concap.h. - - -A Network interface using encapsulation protocols must also provide -some service primitives to the encapsulation protocol: - -- request data being submitted by lower layer (device hardware) -- request a connection being set up by lower layer -- request a connection being released by lower layer - -The encapsulation protocol accesses those primitives via callbacks -provided by the network interface within a struct concap_device_ops. - -struct concap_device_ops{ - - /* to request data be submitted by device */ - int (*data_req)(struct concap_proto *, struct sk_buff *); - - /* Control methods must be set to NULL by devices which do not - support connection control. */ - /* to request a connection be set up */ - int (*connect_req)(struct concap_proto *); - - /* to request a connection be released */ - int (*disconn_req)(struct concap_proto *); -}; - -The network interface does not explicitly provide a receive service -because the encapsulation protocol directly calls netif_rx(). - - - - -An encapsulation protocol itself is actually the -struct concap_proto{ - struct net_device *net_dev; /* net device using our service */ - struct concap_device_ops *dops; /* callbacks provided by device */ - struct concap_proto_ops *pops; /* callbacks provided by us */ - int flags; - void *proto_data; /* protocol specific private data, to - be accessed via *pops methods only*/ - /* - : - whatever - : - */ -}; - -Most of this is filled in when the device requests the protocol to -be reset (opend). The network interface must provide the net_dev and -dops pointers. Other concap_proto members should be considered private -data that are only accessed by the pops callback functions. Likewise, -a concap proto should access the network device's private data -only by means of the callbacks referred to by the dops pointer. - - -A possible extended device structure which uses the connection controlling -encapsulation services could look like this: - -struct concap_device{ - struct net_device net_dev; - struct my_priv /* device->local stuff */ - /* the my_priv struct might contain a - struct concap_device_ops *dops; - to provide the device specific callbacks - */ - struct concap_proto *cprot; /* callbacks provided by protocol */ -}; - - - -Misc Thoughts -============= - -The concept of the concap proto might help to reuse protocol code and -reduce the complexity of certain network interface implementations. -The trade off is that it introduces yet another procedure call layer -when processing the protocol. This has of course some impact on -performance. However, typically the concap interface will be used by -devices attached to slow lines (like telephone, isdn, leased synchronous -lines). For such slow lines, the overhead is probably negligible. -This might no longer hold for certain high speed WAN links (like -ATM). - - -If general linux network interfaces explicitly supported concap -protocols (e.g. by a member struct concap_proto* in struct net_device) -then the interface of the service function could be changed -by passing a pointer of type (struct net_device*) instead of -type (struct concap_proto*). Doing so would make many of the service -functions compatible to network device support functions. - -e.g. instead of the concap protocol's service function - - int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb); - -we could have - - int (*encap_and_xmit)(struct net_device *ndev, struct sk_buff *skb); - -As this is compatible to the dev->hard_start_xmit() method, the device -driver could directly register the concap protocol's encap_and_xmit() -function as its hard_start_xmit() method. This would eliminate one -procedure call layer. - - -The device's data request function could also be defined as - - int (*data_req)(struct net_device *ndev, struct sk_buff *skb); - -This might even allow for some protocol stacking. And the network -interface might even register the same data_req() function directly -as its hard_start_xmit() method when a zero layer encapsulation -protocol is configured. Thus, eliminating the performance penalty -of the concap interface when a trivial concap protocol is used. -Nevertheless, the device remains able to support encapsulation -protocol configuration. - diff --git a/Documentation/isdn/README.diversion b/Documentation/isdn/README.diversion deleted file mode 100644 index bddcd5fb86ff..000000000000 --- a/Documentation/isdn/README.diversion +++ /dev/null @@ -1,127 +0,0 @@ -The isdn diversion services are a supporting module working together with -the isdn4linux and the HiSax module for passive cards. -Active cards, TAs and cards using a own or other driver than the HiSax -module need to be adapted to the HL<->LL interface described in a separate -document. The diversion services may be used with all cards supported by -the HiSax driver. -The diversion kernel interface and controlling tool divertctrl were written -by Werner Cornelius (werner@isdn4linux.de or werner@titro.de) under the -GNU General Public License. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Table of contents -================= - -1. Features of the i4l diversion services - (Or what can the i4l diversion services do for me) - -2. Required hard- and software - -3. Compiling, installing and loading/unloading the module - Tracing calling and diversion information - -4. Tracing calling and diversion information - -5. Format of the divert device ASCII output - - -1. Features of the i4l diversion services - (Or what can the i4l diversion services do for me) - - The i4l diversion services offers call forwarding and logging normally - only supported by isdn phones. Incoming calls may be diverted - unconditionally (CFU), when not reachable (CFNR) or on busy condition - (CFB). - The diversions may be invoked statically in the providers exchange - as normally done by isdn phones. In this case all incoming calls - with a special (or all) service identifiers are forwarded if the - forwarding reason is met. Activated static services may also be - interrogated (queried). - The i4l diversion services additionally offers a dynamic version of - call forwarding which is not preprogrammed inside the providers exchange - but dynamically activated by i4l. - In this case all incoming calls are checked by rules that may be - compared to the mechanism of ipfwadm or ipchains. If a given rule matches - the checking process is finished and the rule matching will be applied - to the call. - The rules include primary and secondary service identifiers, called - number and subaddress, callers number and subaddress and whether the rule - matches to all filtered calls or only those when all B-channel resources - are exhausted. - Actions that may be invoked by a rule are ignore, proceed, reject, - direct divert or delayed divert of a call. - All incoming calls matching a rule except the ignore rule a reported and - logged as ASCII via the proc filesystem (/proc/net/isdn/divert). If proceed - is selected the call will be held in a proceeding state (without ringing) - for a certain amount of time to let an external program or client decide - how to handle the call. - - -2. Required hard- and software - - For using the i4l diversion services the isdn line must be of a EURO/DSS1 - type. Additionally the i4l services only work together with the HiSax - driver for passive isdn cards. All HiSax supported cards may be used for - the diversion purposes. - The static diversion services require the provider having static services - CFU, CFNR, CFB activated on an MSN-line. The static services may not be - used on a point-to-point connection. Further the static services are only - available in some countries (for example germany). Countries requiring the - keypad protocol for activating static diversions (like the netherlands) are - not supported but may use the tty devices for this purpose. - The dynamic diversion services may be used in all countries if the provider - enables the feature CF (call forwarding). This should work on both MSN- and - point-to-point lines. - To add and delete rules the additional divertctrl program is needed. This - program is part of the isdn4kutils package. - -3. Compiling, installing and loading/unloading the module - Tracing calling and diversion information - - - To compile the i4l code with diversion support you need to say yes to the - DSS1 diversion services when selecting the i4l options in the kernel - config (menuconfig or config). - After having properly activated a make modules and make modules_install all - required modules will be correctly installed in the needed modules dirs. - As the diversion services are currently not included in the scripts of most - standard distributions you will have to add a "insmod dss1_divert" after - having loaded the global isdn module. - The module can be loaded without any command line parameters. - If the module is actually loaded and active may be checked with a - "cat /proc/modules" or "ls /proc/net/isdn/divert". The divert file is - dynamically created by the diversion module and removed when the module is - unloaded. - - -4. Tracing calling and diversion information - - You also may put a "cat /proc/net/isdn/divert" in the background with the - output redirected to a file. Then all actions of the module are logged. - The divert file in the proc system may be opened more than once, so in - conjunction with inetd and a small remote client on other machines inside - your network incoming calls and reactions by the module may be shown on - every listening machine. - If a call is reported as proceeding an external program or client may - specify during a certain amount of time (normally 4 to 10 seconds) what - to do with that call. - To unload the module all open files to the device in the proc system must - be closed. Otherwise the module (and isdn.o) may not be unloaded. - -5. Format of the divert device ASCII output - - To be done later - diff --git a/Documentation/isdn/README.fax b/Documentation/isdn/README.fax deleted file mode 100644 index 5314958a8a6e..000000000000 --- a/Documentation/isdn/README.fax +++ /dev/null @@ -1,45 +0,0 @@ - -Fax with isdn4linux -=================== - -When enabled during kernel configuration, the tty emulator -of the ISDN subsystem is capable of the Fax Class 2 commands. - -This only makes sense under the following conditions : - -- You need the commands as dummy, because you are using - hylafax (with patch) for AVM capi. -- You want to use the fax capabilities of your isdn-card. - (supported cards are listed below) - - -NOTE: This implementation does *not* support fax with passive - ISDN-cards (known as softfax). The low-level driver of - the ISDN-card and/or the card itself must support this. - - -Supported ISDN-Cards --------------------- - -Eicon DIVA Server BRI/PCI - - full support with both B-channels. - -Eicon DIVA Server 4BRI/PCI - - full support with all B-channels. - -Eicon DIVA Server PRI/PCI - - full support on amount of B-channels - depending on DSPs on board. - - - -The command set is known as Class 2 (not Class 2.0) and -can be activated by AT+FCLASS=2 - - -The interface between the link-level-module and the hardware-level driver -is described in the files INTERFACE.fax and INTERFACE. - -Armin -mac@melware.de - diff --git a/Documentation/isdn/README.hfc-pci b/Documentation/isdn/README.hfc-pci deleted file mode 100644 index e8a4ef0226e8..000000000000 --- a/Documentation/isdn/README.hfc-pci +++ /dev/null @@ -1,41 +0,0 @@ -The driver for the HFC-PCI and HFC-PCI-A chips from CCD may be used -for many OEM cards using this chips. -Additionally the driver has a special feature which makes it possible -to read the echo-channel of the isdn bus. So all frames in both directions -may be logged. -When the echo logging feature is used the number of available B-channels -for a HFC-PCI card is reduced to 1. Of course this is only relevant to -the card, not to the isdn line. -To activate the echo mode the following ioctls must be entered: - -hisaxctrl 10 1 - -This reduces the available channels to 1. There must not be open connections -through this card when entering the command. -And then: - -hisaxctrl 12 1 - -This enables the echo mode. If Hex logging is activated the isdnctrlx -devices show a output with a line beginning of HEX: for the providers -exchange and ECHO: for isdn devices sending to the provider. - -If more than one HFC-PCI cards are installed, a specific card may be selected -at the hisax module load command line. Supply the load command with the desired -IO-address of the desired card. -Example: -There tree cards installed in your machine at IO-base addresses 0xd000, 0xd400 -and 0xdc00 -If you want to use the card at 0xd400 standalone you should supply the insmod -or depmod with type=35 io=0xd400. -If you want to use all three cards, but the order needs to be at 0xdc00,0xd400, -0xd000 you may give the parameters type=35,35,35 io=0xdc00,0xd400,0xd00 -Then the desired card will be the initialised in the desired order. -If the io parameter is used the io addresses of all used cards should be -supplied else the parameter is assumed 0 and a auto search for a free card is -invoked which may not give the wanted result. - -Comments and reports to werner@isdn4linux.de or werner@isdn-development.de - - - diff --git a/Documentation/isdn/README.syncppp b/Documentation/isdn/README.syncppp deleted file mode 100644 index 27d260095cce..000000000000 --- a/Documentation/isdn/README.syncppp +++ /dev/null @@ -1,58 +0,0 @@ -Some additional information for setting up a syncPPP -connection using network interfaces. ---------------------------------------------------------------- - -You need one thing beside the isdn4linux package: - - a patched pppd .. (I called it ipppd to show the difference) - -Compiling isdn4linux with sync PPP: ------------------------------------ -To compile isdn4linux with the sync PPP part, you have -to answer the appropriate question when doing a "make config" -Don't forget to load the slhc.o -module before the isdn.o module, if VJ-compression support -is not compiled into your kernel. (e.g if you have no PPP or -CSLIP in the kernel) - -Using isdn4linux with sync PPP: -------------------------------- -Sync PPP is just another encapsulation for isdn4linux. The -name to enable sync PPP encapsulation is 'syncppp' .. e.g: - - /sbin/isdnctrl encap ippp0 syncppp - -The name of the interface is here 'ippp0'. You need -one interface with the name 'ippp0' to saturate the -ipppd, which checks the ppp version via this interface. -Currently, all devices must have the name ipppX where -'X' is a decimal value. - -To set up a PPP connection you need the ipppd .. You must start -the ipppd once after installing the modules. The ipppd -communicates with the isdn4linux link-level driver using the -/dev/ippp0 to /dev/ippp15 devices. One ipppd can handle -all devices at once. If you want to use two PPP connections -at the same time, you have to connect the ipppd to two -devices .. and so on. -I've implemented one additional option for the ipppd: - 'useifip' will get (if set to not 0.0.0.0) the IP address - for the negotiation from the attached network-interface. -(also: ipppd will try to negotiate pointopoint IP as remote IP) -You must disable BSD-compression, this implementation can't -handle compressed packets. - -Check the etc/rc.isdn.syncppp in the isdn4kernel-util package -for an example setup script. - -To use the MPPP stuff, you must configure a slave device -with isdn4linux. Now call the ipppd with the '+mp' option. -To increase the number of links, you must use the -'addlink' option of the isdnctrl tool. (rc.isdn.syncppp.MPPP is -an example script) - -enjoy it, - michael - - - diff --git a/Documentation/isdn/README.x25 b/Documentation/isdn/README.x25 deleted file mode 100644 index e561a77c4e22..000000000000 --- a/Documentation/isdn/README.x25 +++ /dev/null @@ -1,184 +0,0 @@ - -X.25 support within isdn4linux -============================== - -This is alpha/beta test code. Use it completely at your own risk. -As new versions appear, the stuff described here might suddenly change -or become invalid without notice. - -Keep in mind: - -You are using several new parts of the 2.2.x kernel series which -have not been tested in a large scale. Therefore, you might encounter -more bugs as usual. - -- If you connect to an X.25 neighbour not operated by yourself, ASK the - other side first. Be prepared that bugs in the protocol implementation - might result in problems. - -- This implementation has never wiped out my whole hard disk yet. But as - this is experimental code, don't blame me if that happened to you. - Backing up important data will never harm. - -- Monitor your isdn connections while using this software. This should - prevent you from undesired phone bills in case of driver problems. - - - - -How to configure the kernel -=========================== - -The ITU-T (former CCITT) X.25 network protocol layer has been implemented -in the Linux source tree since version 2.1.16. The isdn subsystem might be -useful to run X.25 on top of ISDN. If you want to try it, select - - "CCITT X.25 Packet Layer" - -from the networking options as well as - - "ISDN Support" and "X.25 PLP on Top of ISDN" - -from the ISDN subsystem options when you configure your kernel for -compilation. You currently also need to enable -"Prompt for development and/or incomplete code/drivers" from the -"Code maturity level options" menu. For the x25trace utility to work -you also need to enable "Packet socket". - -For local testing it is also recommended to enable the isdnloop driver -from the isdn subsystem's configuration menu. - -For testing, it is recommended that all isdn drivers and the X.25 PLP -protocol are compiled as loadable modules. Like this, you can recover -from certain errors by simply unloading and reloading the modules. - - - -What's it for? How to use it? -============================= - -X.25 on top of isdn might be useful with two different scenarios: - -- You might want to access a public X.25 data network from your Linux box. - You can use i4l if you were physically connected to the X.25 switch - by an ISDN B-channel (leased line as well as dial up connection should - work). - - This corresponds to ITU-T recommendation X.31 Case A (circuit-mode - access to PSPDN [packet switched public data network]). - - NOTE: X.31 also covers a Case B (access to PSPDN via virtual - circuit / packet mode service). The latter mode (which in theory - also allows using the D-channel) is not supported by isdn4linux. - It should however be possible to establish such packet mode connections - with certain active isdn cards provided that the firmware supports X.31 - and the driver exports this functionality to the user. Currently, - the AVM B1 driver is the only driver which does so. (It should be - possible to access D-channel X.31 with active AVM cards using the - CAPI interface of the AVM-B1 driver). - -- Or you might want to operate certain ISDN teleservices on your linux - box. A lot of those teleservices run on top of the ISO-8208 - (DTE-DTE mode) network layer protocol. ISO-8208 is essentially the - same as ITU-T X.25. - - Popular candidates of such teleservices are EUROfile transfer or any - teleservice applying ITU-T recommendation T.90. - -To use the X.25 protocol on top of isdn, just create an isdn network -interface as usual, configure your own and/or peer's ISDN numbers, -and choose x25iface encapsulation by - - isdnctrl encap x25iface. - -Once encap is set like this, the device can be used by the X.25 packet layer. - -All the stuff needed for X.25 is implemented inside the isdn link -level (mainly isdn_net.c and some new source files). Thus, it should -work with every existing HL driver. I was able to successfully open X.25 -connections on top of the isdnloop driver and the hisax driver. -"x25iface"-encapsulation bypasses demand dialing. Dialing will be -initiated when the upper (X.25 packet) layer requests the lapb datalink to -be established. But hangup timeout is still active. Whenever a hangup -occurs, all existing X.25 connections on that link will be cleared -It is recommended to use sufficiently large hangup-timeouts for the -isdn interfaces. - - -In order to set up a conforming protocol stack you also need to -specify the proper l2_prot parameter: - -To operate in ISO-8208 X.25 DTE-DTE mode, use - - isdnctrl l2_prot x75i - -To access an X.25 network switch via isdn (your linux box is the DTE), use - - isdnctrl l2_prot x25dte - -To mimic an X.25 network switch (DCE side of the connection), use - - isdnctrl l2_prot x25dce - -However, x25dte or x25dce is currently not supported by any real HL -level driver. The main difference between x75i and x25dte/dce is that -x25d[tc]e uses fixed lap_b addresses. With x75i, the side which -initiates the isdn connection uses the DTE's lap_b address while the -called side used the DCE's lap_b address. Thus, l2_prot x75i might -probably work if you access a public X.25 network as long as the -corresponding isdn connection is set up by you. At least one test -was successful to connect via isdn4linux to an X.25 switch using this -trick. At the switch side, a terminal adapter X.21 was used to connect -it to the isdn. - - -How to set up a test installation? -================================== - -To test X.25 on top of isdn, you need to get - -- a recent version of the "isdnctrl" program that supports setting the new - X.25 specific parameters. - -- the x25-utils-2.X package from - ftp://ftp.hes.iki.fi/pub/ham/linux/ax25/x25utils-* - (don't confuse the x25-utils with the ax25-utils) - -- an application program that uses linux PF_X25 sockets (some are - contained in the x25-util package). - -Before compiling the user level utilities make sure that the compiler/ -preprocessor will fetch the proper kernel header files of this kernel -source tree. Either make /usr/include/linux a symbolic link pointing to -this kernel's include/linux directory or set the appropriate compiler flags. - -When all drivers and interfaces are loaded and configured you need to -ifconfig the network interfaces up and add X.25-routes to them. Use -the usual ifconfig tool. - -ifconfig up - -But a special x25route tool (distributed with the x25-util package) -is needed to set up X.25 routes. I.e. - -x25route add 01 - -will cause all x.25 connections to the destination X.25-address -"01" to be routed to your created isdn network interface. - -There are currently no real X.25 applications available. However, for -tests, the x25-utils package contains a modified version of telnet -and telnetd that uses X.25 sockets instead of tcp/ip sockets. You can -use those for your first tests. Furthermore, you might check -ftp://ftp.hamburg.pop.de/pub/LOCAL/linux/i4l-eft/ which contains some -alpha-test implementation ("eftp4linux") of the EUROfile transfer -protocol. - -The scripts distributed with the eftp4linux test releases might also -provide useful examples for setting up X.25 on top of isdn. - -The x25-utility package also contains an x25trace tool that can be -used to monitor X.25 packets received by the network interfaces. -The /proc/net/x25* files also contain useful information. - -- Henner diff --git a/Documentation/isdn/syncPPP.FAQ b/Documentation/isdn/syncPPP.FAQ deleted file mode 100644 index 3257a4bc0786..000000000000 --- a/Documentation/isdn/syncPPP.FAQ +++ /dev/null @@ -1,224 +0,0 @@ -simple isdn4linux PPP FAQ .. to be continued .. not 'debugged' -------------------------------------------------------------------- - -Q01: what's pppd, ipppd, syncPPP, asyncPPP ?? -Q02: error message "this system lacks PPP support" -Q03: strange information using 'ifconfig' -Q04: MPPP?? What's that and how can I use it ... -Q05: I tried MPPP but it doesn't work -Q06: can I use asynchronous PPP encapsulation with network devices -Q07: A SunISDN machine can't connect to my i4l system -Q08: I wanna talk to several machines, which need different configs -Q09: Starting the ipppd, I get only error messages from i4l -Q10: I wanna use dynamic IP address assignment -Q11: I can't connect. How can I check where the problem is. -Q12: How can I reduce login delay? - -------------------------------------------------------------------- - -Q01: pppd, ipppd, syncPPP, asyncPPP .. what is that ? - what should I use? -A: The pppd is for asynchronous PPP .. asynchronous means - here, the framing is character based. (e.g when - using ttyI* or tty* devices) - - The ipppd handles PPP packets coming in HDLC - frames (bit based protocol) ... The PPP driver - in isdn4linux pushes all IP packets direct - to the network layer and all PPP protocol - frames to the /dev/ippp* device. - So, the ipppd is a simple external network - protocol handler. - - If you login into a remote machine using the - /dev/ttyI* devices and then enable PPP on the - remote terminal server -> use the 'old' pppd - - If your remote side immediately starts to send - frames ... you probably connect to a - syncPPP machine .. use the network device part - of isdn4linux with the 'syncppp' encapsulation - and make sure, that the ipppd is running and - connected to at least one /dev/ippp*. Check the - isdn4linux manual on how to configure a network device. - --- - -Q02: when I start the ipppd .. I only get the - error message "this system lacks PPP support" -A: check that at least the device 'ippp0' exists. - (you can check this e.g with the program 'ifconfig') - The ipppd NEEDS this device under THIS name .. - If this device doesn't exists, use: - isdnctrl addif ippp0 - isdnctrl encap ippp0 syncppp - ... (see isdn4linux doc for more) ... -A: Maybe you have compiled the ipppd with another - kernel source tree than the kernel you currently - run ... - --- - -Q03: when I list the netdevices with ifconfig I see, that - my ISDN interface has a HWaddr and IRQ=0 and Base - address = 0 -A: The device is a fake ethernet device .. ignore IRQ and baseaddr - You need the HWaddr only for ethernet encapsulation. - --- - -Q04: MPPP?? What's that and how can I use it ... - -A: MPPP or MP or MPP (Warning: MP is also an - acronym for 'Multi Processor') stands for - Multi Point to Point and means bundling - of several channels to one logical stream. - To enable MPPP negotiation you must call the - ipppd with the '+mp' option. - You must also configure a slave device for - every additional channel. (see the i4l manual - for more) - To use channel bundling you must first activate - the 'master' or initial call. Now you can add - the slave channels with the command: - isdnctrl addlink - e.g: - isdnctrl addlink ippp0 - This is different from other encapsulations of - isdn4linux! With syncPPP, there is no automatic - activation of slave devices. - --- - -Q05: I tried MPPP but it doesn't work .. the ipppd - writes in the debug log something like: - .. rcvd [0][proto=0x3d] c0 00 00 00 80 fd 01 01 00 0a ... - .. sent [0][LCP ProtRej id=0x2 00 3d c0 00 00 00 80 fd 01 ... - -A: you forgot to compile MPPP/RFC1717 support into the - ISDN Subsystem. Recompile with this option enabled. - --- - -Q06: can I use asynchronous PPP encapsulation - over the network interface of isdn4linux .. - -A: No .. that's not possible .. Use the standard - PPP package over the /dev/ttyI* devices. You - must not use the ipppd for this. - --- - -Q07: A SunISDN machine tries to connect my i4l system, - which doesn't work. - Checking the debug log I just saw garbage like: -!![ ... fill in the line ... ]!! - -A: The Sun tries to talk asynchronous PPP ... i4l - can't understand this ... try to use the ttyI* - devices with the standard PPP/pppd package - -A: (from Alexanter Strauss: ) -!![ ... fill in mail ]!! - --- - -Q08: I wanna talk to remote machines, which need - a different configuration. The only way - I found to do this is to kill the ipppd and - start a new one with another config to connect - to the second machine. - -A: you must bind a network interface explicitly to - an ippp device, where you can connect a (for this - interface) individually configured ipppd. - --- - -Q09: When I start the ipppd I only get error messages - from the i4l driver .. - -A: When starting, the ipppd calls functions which may - trigger a network packet. (e.g gethostbyname()). - Without the ipppd (at this moment, it is not - fully started) we can't handle this network request. - Try to configure hostnames necessary for the ipppd - in your local /etc/hosts file or in a way, that - your system can resolve it without using an - isdn/ippp network-interface. - --- - -Q10: I wanna use dynamic IP address assignment ... How - must I configure the network device. - -A: At least you must have a route which forwards - a packet to the ippp network-interface to trigger - the dial-on-demand. - A default route to the ippp-interface will work. - Now you must choose a dummy IP address for your - interface. - If for some reason you can't set the default - route to the ippp interface, you may take any - address of the subnet from which you expect your - dynamic IP number and set a 'network route' for - this subnet to the ippp interface. - To allow overriding of the dummy address you - must call the ipppd with the 'ipcp-accept-local' option. - -A: You must know, how the ipppd gets the addresses it wanna - configure. If you don't give any option, the ipppd - tries to negotiate the local host address! - With the option 'noipdefault' it requests an address - from the remote machine. With 'useifip' it gets the - addresses from the net interface. Or you set the address - on the option line with the option. - Note: the IP address of the remote machine must be configured - locally or the remote machine must send it in an IPCP request. - If your side doesn't know the IP address after negotiation, it - closes the connection! - You must allow overriding of address with the 'ipcp-accept-*' - options, if you have set your own or the remote address - explicitly. - -A: Maybe you try these options .. e.g: - - /sbin/ipppd :$REMOTE noipdefault /dev/ippp0 - - where REMOTE must be the address of the remote machine (the - machine, which gives you your address) - --- - -Q11: I can't connect. How can I check where the problem is. - -A: A good help log is the debug output from the ipppd... - Check whether you can find there: - - only a few LCP-conf-req SENT messages (less then 10) - and then a Term-REQ: - -> check whether your ISDN card is well configured - it seems, that your machine doesn't dial - (IRQ,IO,Proto, etc problems) - Configure your ISDN card to print debug messages and - check the /dev/isdnctrl output next time. There - you can see, whether there is activity on the card/line. - - there are at least a few RECV messages in the log: - -> fine: your card is dialing and your remote machine - tries to talk with you. Maybe only a missing - authentication. Check your ipppd configuration again. - - the ipppd exits for some reason: - -> not good ... check /var/adm/syslog and /var/adm/daemon. - Could be a bug in the ipppd. - --- - -Q12: How can I reduce login delay? - -A: Log a login session ('debug' log) and check which options - your remote side rejects. Next time configure your ipppd - to not negotiate these options. Another 'side effect' is, that - this increases redundancy. (e.g your remote side is buggy and - rejects options in a wrong way). - - - diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 18735dc460a0..111636ad1bad 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -23,8 +23,8 @@ running, the suggested command should tell you. Again, keep in mind that this list assumes you are already functionally running a Linux kernel. Also, not all tools are necessary on all -systems; obviously, if you don't have any ISDN hardware, for example, -you probably needn't concern yourself with isdn4k-utils. +systems; obviously, if you don't have any PC Card hardware, for example, +you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== Program Minimal version Command to check the version @@ -45,7 +45,6 @@ btrfs-progs 0.18 btrfsck pcmciautils 004 pccardctl -V quota-tools 3.09 quota -V PPP 2.4.0 pppd --version -isdn4k-utils 3.1pre1 isdnctrl 2>&1|grep version nfs-utils 1.0.5 showmount --version procps 3.2.0 ps --version oprofile 0.9 oprofiled --version @@ -279,12 +278,6 @@ which can be made by:: as root. -Isdn4k-utils ------------- - -Due to changes in the length of the phone number field, isdn4k-utils -needs to be recompiled or (preferably) upgraded. - NFS-utils --------- @@ -448,11 +441,6 @@ PPP - -Isdn4k-utils ------------- - -- - NFS-utils --------- diff --git a/MAINTAINERS b/MAINTAINERS index 0c55b0fedbe2..3a761e680296 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8371,9 +8371,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kkeil/isdn-2.6.git S: Maintained F: Documentation/isdn/ F: drivers/isdn/ -F: include/linux/isdn.h F: include/linux/isdn/ -F: include/uapi/linux/isdn.h F: include/uapi/linux/isdn/ IT87 HARDWARE MONITORING DRIVER diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig index 1ca4d70d198a..6e3bf833c67e 100644 --- a/drivers/isdn/Kconfig +++ b/drivers/isdn/Kconfig @@ -21,27 +21,6 @@ menuconfig ISDN if ISDN -menuconfig ISDN_I4L - tristate "Old ISDN4Linux (deprecated)" - depends on TTY - ---help--- - This driver allows you to use an ISDN adapter for networking - connections and as dialin/out device. The isdn-tty's have a built - in AT-compatible modem emulator. Network devices support autodial, - channel-bundling, callback and caller-authentication without having - a daemon running. A reduced T.70 protocol is supported with tty's - suitable for German BTX. On D-Channel, the protocols EDSS1 - (Euro-ISDN) and 1TR6 (German style) are supported. See - for more information. - - ISDN support in the linux kernel is moving towards a new API, - called CAPI (Common ISDN Application Programming Interface). - Therefore the old ISDN4Linux layer will eventually become obsolete. - It is still available, though, for use with adapters that are not - supported by the new CAPI subsystem yet. - -source "drivers/isdn/i4l/Kconfig" - menuconfig ISDN_CAPI tristate "CAPI 2.0 subsystem" help @@ -71,9 +50,4 @@ source "drivers/isdn/hysdn/Kconfig" source "drivers/isdn/mISDN/Kconfig" -config ISDN_HDLC - tristate - select CRC_CCITT - select BITREVERSE - endif # ISDN diff --git a/drivers/isdn/Makefile b/drivers/isdn/Makefile index 7487f0bbe855..379b4a03c321 100644 --- a/drivers/isdn/Makefile +++ b/drivers/isdn/Makefile @@ -7,7 +7,5 @@ obj-$(CONFIG_ISDN_I4L) += i4l/ obj-$(CONFIG_ISDN_CAPI) += capi/ obj-$(CONFIG_MISDN) += mISDN/ obj-$(CONFIG_ISDN) += hardware/ -obj-$(CONFIG_ISDN_DIVERSION) += divert/ -obj-$(CONFIG_ISDN_DRV_LOOP) += isdnloop/ obj-$(CONFIG_HYSDN) += hysdn/ obj-$(CONFIG_ISDN_DRV_GIGASET) += gigaset/ diff --git a/drivers/isdn/capi/Kconfig b/drivers/isdn/capi/Kconfig index abaadce376c5..089dbee18f36 100644 --- a/drivers/isdn/capi/Kconfig +++ b/drivers/isdn/capi/Kconfig @@ -27,15 +27,6 @@ config ISDN_CAPI_MIDDLEWARE device. If you want to use pppd with pppdcapiplugin to dial up to your ISP, say Y here. -config ISDN_CAPI_CAPIDRV - tristate "CAPI2.0 capidrv interface support" - depends on ISDN_I4L - help - This option provides the glue code to hook up CAPI driven cards to - the legacy isdn4linux link layer. If you have a card which is - supported by a CAPI driver, but still want to use old features like - ippp interfaces or ttyI emulation, say Y/M here. - config ISDN_CAPI_CAPIDRV_VERBOSE bool "Verbose reason code reporting" depends on ISDN_CAPI_CAPIDRV diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c deleted file mode 100644 index e8949f3dcae1..000000000000 --- a/drivers/isdn/capi/capidrv.c +++ /dev/null @@ -1,2525 +0,0 @@ -/* $Id: capidrv.c,v 1.1.2.2 2004/01/12 23:17:24 keil Exp $ - * - * ISDN4Linux Driver, using capi20 interface (kernelcapi) - * - * Copyright 1997 by Carsten Paeth - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "capidrv.h" - -static int debugmode = 0; - -MODULE_DESCRIPTION("CAPI4Linux: Interface to ISDN4Linux"); -MODULE_AUTHOR("Carsten Paeth"); -MODULE_LICENSE("GPL"); -module_param(debugmode, uint, S_IRUGO | S_IWUSR); - -/* -------- type definitions ----------------------------------------- */ - - -struct capidrv_contr { - - struct capidrv_contr *next; - struct module *owner; - u32 contrnr; - char name[20]; - - /* - * for isdn4linux - */ - isdn_if interface; - int myid; - - /* - * LISTEN state - */ - int state; - u32 cipmask; - u32 cipmask2; - struct timer_list listentimer; - - /* - * ID of capi message sent - */ - u16 msgid; - - /* - * B-Channels - */ - int nbchan; - struct capidrv_bchan { - struct capidrv_contr *contr; - u8 msn[ISDN_MSNLEN]; - int l2; - int l3; - u8 num[ISDN_MSNLEN]; - u8 mynum[ISDN_MSNLEN]; - int si1; - int si2; - int incoming; - int disconnecting; - struct capidrv_plci { - struct capidrv_plci *next; - u32 plci; - u32 ncci; /* ncci for CONNECT_ACTIVE_IND */ - u16 msgid; /* to identfy CONNECT_CONF */ - int chan; - int state; - int leasedline; - struct capidrv_ncci { - struct capidrv_ncci *next; - struct capidrv_plci *plcip; - u32 ncci; - u16 msgid; /* to identfy CONNECT_B3_CONF */ - int chan; - int state; - int oldstate; - /* */ - u16 datahandle; - struct ncci_datahandle_queue { - struct ncci_datahandle_queue *next; - u16 datahandle; - int len; - } *ackqueue; - } *ncci_list; - } *plcip; - struct capidrv_ncci *nccip; - } *bchans; - - struct capidrv_plci *plci_list; - - /* for q931 data */ - u8 q931_buf[4096]; - u8 *q931_read; - u8 *q931_write; - u8 *q931_end; -}; - - -struct capidrv_data { - struct capi20_appl ap; - int ncontr; - struct capidrv_contr *contr_list; -}; - -typedef struct capidrv_plci capidrv_plci; -typedef struct capidrv_ncci capidrv_ncci; -typedef struct capidrv_contr capidrv_contr; -typedef struct capidrv_data capidrv_data; -typedef struct capidrv_bchan capidrv_bchan; - -/* -------- data definitions ----------------------------------------- */ - -static capidrv_data global; -static DEFINE_SPINLOCK(global_lock); - -static void handle_dtrace_data(capidrv_contr *card, - int send, int level2, u8 *data, u16 len); - -/* -------- convert functions ---------------------------------------- */ - -static inline u32 b1prot(int l2, int l3) -{ - switch (l2) { - case ISDN_PROTO_L2_X75I: - case ISDN_PROTO_L2_X75UI: - case ISDN_PROTO_L2_X75BUI: - return 0; - case ISDN_PROTO_L2_HDLC: - default: - return 0; - case ISDN_PROTO_L2_TRANS: - return 1; - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - return 2; - case ISDN_PROTO_L2_FAX: - return 4; - case ISDN_PROTO_L2_MODEM: - return 8; - } -} - -static inline u32 b2prot(int l2, int l3) -{ - switch (l2) { - case ISDN_PROTO_L2_X75I: - case ISDN_PROTO_L2_X75UI: - case ISDN_PROTO_L2_X75BUI: - default: - return 0; - case ISDN_PROTO_L2_HDLC: - case ISDN_PROTO_L2_TRANS: - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - case ISDN_PROTO_L2_MODEM: - return 1; - case ISDN_PROTO_L2_FAX: - return 4; - } -} - -static inline u32 b3prot(int l2, int l3) -{ - switch (l2) { - case ISDN_PROTO_L2_X75I: - case ISDN_PROTO_L2_X75UI: - case ISDN_PROTO_L2_X75BUI: - case ISDN_PROTO_L2_HDLC: - case ISDN_PROTO_L2_TRANS: - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - case ISDN_PROTO_L2_MODEM: - default: - return 0; - case ISDN_PROTO_L2_FAX: - return 4; - } -} - -static _cstruct b1config_async_v110(u16 rate) -{ - /* CAPI-Spec "B1 Configuration" */ - static unsigned char buf[9]; - buf[0] = 8; /* len */ - /* maximum bitrate */ - buf[1] = rate & 0xff; buf[2] = (rate >> 8) & 0xff; - buf[3] = 8; buf[4] = 0; /* 8 bits per character */ - buf[5] = 0; buf[6] = 0; /* parity none */ - buf[7] = 0; buf[8] = 0; /* 1 stop bit */ - return buf; -} - -static _cstruct b1config(int l2, int l3) -{ - switch (l2) { - case ISDN_PROTO_L2_X75I: - case ISDN_PROTO_L2_X75UI: - case ISDN_PROTO_L2_X75BUI: - case ISDN_PROTO_L2_HDLC: - case ISDN_PROTO_L2_TRANS: - default: - return NULL; - case ISDN_PROTO_L2_V11096: - return b1config_async_v110(9600); - case ISDN_PROTO_L2_V11019: - return b1config_async_v110(19200); - case ISDN_PROTO_L2_V11038: - return b1config_async_v110(38400); - } -} - -static inline u16 si2cip(u8 si1, u8 si2) -{ - static const u8 cip[17][5] = - { - /* 0 1 2 3 4 */ - {0, 0, 0, 0, 0}, /*0 */ - {16, 16, 4, 26, 16}, /*1 */ - {17, 17, 17, 4, 4}, /*2 */ - {2, 2, 2, 2, 2}, /*3 */ - {18, 18, 18, 18, 18}, /*4 */ - {2, 2, 2, 2, 2}, /*5 */ - {0, 0, 0, 0, 0}, /*6 */ - {2, 2, 2, 2, 2}, /*7 */ - {2, 2, 2, 2, 2}, /*8 */ - {21, 21, 21, 21, 21}, /*9 */ - {19, 19, 19, 19, 19}, /*10 */ - {0, 0, 0, 0, 0}, /*11 */ - {0, 0, 0, 0, 0}, /*12 */ - {0, 0, 0, 0, 0}, /*13 */ - {0, 0, 0, 0, 0}, /*14 */ - {22, 22, 22, 22, 22}, /*15 */ - {27, 27, 27, 28, 27} /*16 */ - }; - if (si1 > 16) - si1 = 0; - if (si2 > 4) - si2 = 0; - - return (u16) cip[si1][si2]; -} - -static inline u8 cip2si1(u16 cipval) -{ - static const u8 si[32] = - {7, 1, 7, 7, 1, 1, 7, 7, /*0-7 */ - 7, 1, 0, 0, 0, 0, 0, 0, /*8-15 */ - 1, 2, 4, 10, 9, 9, 15, 7, /*16-23 */ - 7, 7, 1, 16, 16, 0, 0, 0}; /*24-31 */ - - if (cipval > 31) - cipval = 0; /* .... */ - return si[cipval]; -} - -static inline u8 cip2si2(u16 cipval) -{ - static const u8 si[32] = - {0, 0, 0, 0, 2, 3, 0, 0, /*0-7 */ - 0, 3, 0, 0, 0, 0, 0, 0, /*8-15 */ - 1, 2, 0, 0, 9, 0, 0, 0, /*16-23 */ - 0, 0, 3, 2, 3, 0, 0, 0}; /*24-31 */ - - if (cipval > 31) - cipval = 0; /* .... */ - return si[cipval]; -} - - -/* -------- controller management ------------------------------------- */ - -static inline capidrv_contr *findcontrbydriverid(int driverid) -{ - unsigned long flags; - capidrv_contr *p; - - spin_lock_irqsave(&global_lock, flags); - for (p = global.contr_list; p; p = p->next) - if (p->myid == driverid) - break; - spin_unlock_irqrestore(&global_lock, flags); - return p; -} - -static capidrv_contr *findcontrbynumber(u32 contr) -{ - unsigned long flags; - capidrv_contr *p = global.contr_list; - - spin_lock_irqsave(&global_lock, flags); - for (p = global.contr_list; p; p = p->next) - if (p->contrnr == contr) - break; - spin_unlock_irqrestore(&global_lock, flags); - return p; -} - - -/* -------- plci management ------------------------------------------ */ - -static capidrv_plci *new_plci(capidrv_contr *card, int chan) -{ - capidrv_plci *plcip; - - plcip = kzalloc(sizeof(capidrv_plci), GFP_ATOMIC); - - if (plcip == NULL) - return NULL; - - plcip->state = ST_PLCI_NONE; - plcip->plci = 0; - plcip->msgid = 0; - plcip->chan = chan; - plcip->next = card->plci_list; - card->plci_list = plcip; - card->bchans[chan].plcip = plcip; - - return plcip; -} - -static capidrv_plci *find_plci_by_plci(capidrv_contr *card, u32 plci) -{ - capidrv_plci *p; - for (p = card->plci_list; p; p = p->next) - if (p->plci == plci) - return p; - return NULL; -} - -static capidrv_plci *find_plci_by_msgid(capidrv_contr *card, u16 msgid) -{ - capidrv_plci *p; - for (p = card->plci_list; p; p = p->next) - if (p->msgid == msgid) - return p; - return NULL; -} - -static capidrv_plci *find_plci_by_ncci(capidrv_contr *card, u32 ncci) -{ - capidrv_plci *p; - for (p = card->plci_list; p; p = p->next) - if (p->plci == (ncci & 0xffff)) - return p; - return NULL; -} - -static void free_plci(capidrv_contr *card, capidrv_plci *plcip) -{ - capidrv_plci **pp; - - for (pp = &card->plci_list; *pp; pp = &(*pp)->next) { - if (*pp == plcip) { - *pp = (*pp)->next; - card->bchans[plcip->chan].plcip = NULL; - card->bchans[plcip->chan].disconnecting = 0; - card->bchans[plcip->chan].incoming = 0; - kfree(plcip); - return; - } - } - printk(KERN_ERR "capidrv-%d: free_plci %p (0x%x) not found, Huh?\n", - card->contrnr, plcip, plcip->plci); -} - -/* -------- ncci management ------------------------------------------ */ - -static inline capidrv_ncci *new_ncci(capidrv_contr *card, - capidrv_plci *plcip, - u32 ncci) -{ - capidrv_ncci *nccip; - - nccip = kzalloc(sizeof(capidrv_ncci), GFP_ATOMIC); - - if (nccip == NULL) - return NULL; - - nccip->ncci = ncci; - nccip->state = ST_NCCI_NONE; - nccip->plcip = plcip; - nccip->chan = plcip->chan; - nccip->datahandle = 0; - - nccip->next = plcip->ncci_list; - plcip->ncci_list = nccip; - - card->bchans[plcip->chan].nccip = nccip; - - return nccip; -} - -static inline capidrv_ncci *find_ncci(capidrv_contr *card, u32 ncci) -{ - capidrv_plci *plcip; - capidrv_ncci *p; - - if ((plcip = find_plci_by_ncci(card, ncci)) == NULL) - return NULL; - - for (p = plcip->ncci_list; p; p = p->next) - if (p->ncci == ncci) - return p; - return NULL; -} - -static inline capidrv_ncci *find_ncci_by_msgid(capidrv_contr *card, - u32 ncci, u16 msgid) -{ - capidrv_plci *plcip; - capidrv_ncci *p; - - if ((plcip = find_plci_by_ncci(card, ncci)) == NULL) - return NULL; - - for (p = plcip->ncci_list; p; p = p->next) - if (p->msgid == msgid) - return p; - return NULL; -} - -static void free_ncci(capidrv_contr *card, struct capidrv_ncci *nccip) -{ - struct capidrv_ncci **pp; - - for (pp = &(nccip->plcip->ncci_list); *pp; pp = &(*pp)->next) { - if (*pp == nccip) { - *pp = (*pp)->next; - break; - } - } - card->bchans[nccip->chan].nccip = NULL; - kfree(nccip); -} - -static int capidrv_add_ack(struct capidrv_ncci *nccip, - u16 datahandle, int len) -{ - struct ncci_datahandle_queue *n, **pp; - - n = kmalloc(sizeof(struct ncci_datahandle_queue), GFP_ATOMIC); - if (!n) { - printk(KERN_ERR "capidrv: kmalloc ncci_datahandle failed\n"); - return -1; - } - n->next = NULL; - n->datahandle = datahandle; - n->len = len; - for (pp = &nccip->ackqueue; *pp; pp = &(*pp)->next); - *pp = n; - return 0; -} - -static int capidrv_del_ack(struct capidrv_ncci *nccip, u16 datahandle) -{ - struct ncci_datahandle_queue **pp, *p; - int len; - - for (pp = &nccip->ackqueue; *pp; pp = &(*pp)->next) { - if ((*pp)->datahandle == datahandle) { - p = *pp; - len = p->len; - *pp = (*pp)->next; - kfree(p); - return len; - } - } - return -1; -} - -/* -------- convert and send capi message ---------------------------- */ - -static void send_message(capidrv_contr *card, _cmsg *cmsg) -{ - struct sk_buff *skb; - size_t len; - - if (capi_cmsg2message(cmsg, cmsg->buf)) { - printk(KERN_ERR "capidrv::send_message: parser failure\n"); - return; - } - len = CAPIMSG_LEN(cmsg->buf); - skb = alloc_skb(len, GFP_ATOMIC); - if (!skb) { - printk(KERN_ERR "capidrv::send_message: can't allocate mem\n"); - return; - } - skb_put_data(skb, cmsg->buf, len); - if (capi20_put_message(&global.ap, skb) != CAPI_NOERROR) - kfree_skb(skb); -} - -/* -------- state machine -------------------------------------------- */ - -struct listenstatechange { - int actstate; - int nextstate; - int event; -}; - -static struct listenstatechange listentable[] = -{ - {ST_LISTEN_NONE, ST_LISTEN_WAIT_CONF, EV_LISTEN_REQ}, - {ST_LISTEN_ACTIVE, ST_LISTEN_ACTIVE_WAIT_CONF, EV_LISTEN_REQ}, - {ST_LISTEN_WAIT_CONF, ST_LISTEN_NONE, EV_LISTEN_CONF_ERROR}, - {ST_LISTEN_ACTIVE_WAIT_CONF, ST_LISTEN_ACTIVE, EV_LISTEN_CONF_ERROR}, - {ST_LISTEN_WAIT_CONF, ST_LISTEN_NONE, EV_LISTEN_CONF_EMPTY}, - {ST_LISTEN_ACTIVE_WAIT_CONF, ST_LISTEN_NONE, EV_LISTEN_CONF_EMPTY}, - {ST_LISTEN_WAIT_CONF, ST_LISTEN_ACTIVE, EV_LISTEN_CONF_OK}, - {ST_LISTEN_ACTIVE_WAIT_CONF, ST_LISTEN_ACTIVE, EV_LISTEN_CONF_OK}, - {}, -}; - -static void listen_change_state(capidrv_contr *card, int event) -{ - struct listenstatechange *p = listentable; - while (p->event) { - if (card->state == p->actstate && p->event == event) { - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: listen_change_state %d -> %d\n", - card->contrnr, card->state, p->nextstate); - card->state = p->nextstate; - return; - } - p++; - } - printk(KERN_ERR "capidrv-%d: listen_change_state state=%d event=%d ????\n", - card->contrnr, card->state, event); - -} - -/* ------------------------------------------------------------------ */ - -static void p0(capidrv_contr *card, capidrv_plci *plci) -{ - isdn_ctrl cmd; - - card->bchans[plci->chan].contr = NULL; - cmd.command = ISDN_STAT_DHUP; - cmd.driver = card->myid; - cmd.arg = plci->chan; - card->interface.statcallb(&cmd); - free_plci(card, plci); -} - -/* ------------------------------------------------------------------ */ - -struct plcistatechange { - int actstate; - int nextstate; - int event; - void (*changefunc)(capidrv_contr *card, capidrv_plci *plci); -}; - -static struct plcistatechange plcitable[] = -{ - /* P-0 */ - {ST_PLCI_NONE, ST_PLCI_OUTGOING, EV_PLCI_CONNECT_REQ, NULL}, - {ST_PLCI_NONE, ST_PLCI_ALLOCATED, EV_PLCI_FACILITY_IND_UP, NULL}, - {ST_PLCI_NONE, ST_PLCI_INCOMING, EV_PLCI_CONNECT_IND, NULL}, - {ST_PLCI_NONE, ST_PLCI_RESUMEING, EV_PLCI_RESUME_REQ, NULL}, - /* P-0.1 */ - {ST_PLCI_OUTGOING, ST_PLCI_NONE, EV_PLCI_CONNECT_CONF_ERROR, p0}, - {ST_PLCI_OUTGOING, ST_PLCI_ALLOCATED, EV_PLCI_CONNECT_CONF_OK, NULL}, - /* P-1 */ - {ST_PLCI_ALLOCATED, ST_PLCI_ACTIVE, EV_PLCI_CONNECT_ACTIVE_IND, NULL}, - {ST_PLCI_ALLOCATED, ST_PLCI_DISCONNECTING, EV_PLCI_DISCONNECT_REQ, NULL}, - {ST_PLCI_ALLOCATED, ST_PLCI_DISCONNECTING, EV_PLCI_FACILITY_IND_DOWN, NULL}, - {ST_PLCI_ALLOCATED, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - /* P-ACT */ - {ST_PLCI_ACTIVE, ST_PLCI_DISCONNECTING, EV_PLCI_DISCONNECT_REQ, NULL}, - {ST_PLCI_ACTIVE, ST_PLCI_DISCONNECTING, EV_PLCI_FACILITY_IND_DOWN, NULL}, - {ST_PLCI_ACTIVE, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - {ST_PLCI_ACTIVE, ST_PLCI_HELD, EV_PLCI_HOLD_IND, NULL}, - {ST_PLCI_ACTIVE, ST_PLCI_DISCONNECTING, EV_PLCI_SUSPEND_IND, NULL}, - /* P-2 */ - {ST_PLCI_INCOMING, ST_PLCI_DISCONNECTING, EV_PLCI_CONNECT_REJECT, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_FACILITY_IND, EV_PLCI_FACILITY_IND_UP, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_ACCEPTING, EV_PLCI_CONNECT_RESP, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_DISCONNECTING, EV_PLCI_DISCONNECT_REQ, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_DISCONNECTING, EV_PLCI_FACILITY_IND_DOWN, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - {ST_PLCI_INCOMING, ST_PLCI_DISCONNECTING, EV_PLCI_CD_IND, NULL}, - /* P-3 */ - {ST_PLCI_FACILITY_IND, ST_PLCI_DISCONNECTING, EV_PLCI_CONNECT_REJECT, NULL}, - {ST_PLCI_FACILITY_IND, ST_PLCI_ACCEPTING, EV_PLCI_CONNECT_ACTIVE_IND, NULL}, - {ST_PLCI_FACILITY_IND, ST_PLCI_DISCONNECTING, EV_PLCI_DISCONNECT_REQ, NULL}, - {ST_PLCI_FACILITY_IND, ST_PLCI_DISCONNECTING, EV_PLCI_FACILITY_IND_DOWN, NULL}, - {ST_PLCI_FACILITY_IND, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - /* P-4 */ - {ST_PLCI_ACCEPTING, ST_PLCI_ACTIVE, EV_PLCI_CONNECT_ACTIVE_IND, NULL}, - {ST_PLCI_ACCEPTING, ST_PLCI_DISCONNECTING, EV_PLCI_DISCONNECT_REQ, NULL}, - {ST_PLCI_ACCEPTING, ST_PLCI_DISCONNECTING, EV_PLCI_FACILITY_IND_DOWN, NULL}, - {ST_PLCI_ACCEPTING, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - /* P-5 */ - {ST_PLCI_DISCONNECTING, ST_PLCI_DISCONNECTED, EV_PLCI_DISCONNECT_IND, NULL}, - /* P-6 */ - {ST_PLCI_DISCONNECTED, ST_PLCI_NONE, EV_PLCI_DISCONNECT_RESP, p0}, - /* P-0.Res */ - {ST_PLCI_RESUMEING, ST_PLCI_NONE, EV_PLCI_RESUME_CONF_ERROR, p0}, - {ST_PLCI_RESUMEING, ST_PLCI_RESUME, EV_PLCI_RESUME_CONF_OK, NULL}, - /* P-RES */ - {ST_PLCI_RESUME, ST_PLCI_ACTIVE, EV_PLCI_RESUME_IND, NULL}, - /* P-HELD */ - {ST_PLCI_HELD, ST_PLCI_ACTIVE, EV_PLCI_RETRIEVE_IND, NULL}, - {}, -}; - -static void plci_change_state(capidrv_contr *card, capidrv_plci *plci, int event) -{ - struct plcistatechange *p = plcitable; - while (p->event) { - if (plci->state == p->actstate && p->event == event) { - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: plci_change_state:0x%x %d -> %d\n", - card->contrnr, plci->plci, plci->state, p->nextstate); - plci->state = p->nextstate; - if (p->changefunc) - p->changefunc(card, plci); - return; - } - p++; - } - printk(KERN_ERR "capidrv-%d: plci_change_state:0x%x state=%d event=%d ????\n", - card->contrnr, plci->plci, plci->state, event); -} - -/* ------------------------------------------------------------------ */ - -static _cmsg cmsg; - -static void n0(capidrv_contr *card, capidrv_ncci *ncci) -{ - isdn_ctrl cmd; - - capi_fill_DISCONNECT_REQ(&cmsg, - global.ap.applid, - card->msgid++, - ncci->plcip->plci, - NULL, /* BChannelinformation */ - NULL, /* Keypadfacility */ - NULL, /* Useruserdata */ /* $$$$ */ - NULL /* Facilitydataarray */ - ); - plci_change_state(card, ncci->plcip, EV_PLCI_DISCONNECT_REQ); - send_message(card, &cmsg); - - cmd.command = ISDN_STAT_BHUP; - cmd.driver = card->myid; - cmd.arg = ncci->chan; - card->interface.statcallb(&cmd); - free_ncci(card, ncci); -} - -/* ------------------------------------------------------------------ */ - -struct nccistatechange { - int actstate; - int nextstate; - int event; - void (*changefunc)(capidrv_contr *card, capidrv_ncci *ncci); -}; - -static struct nccistatechange nccitable[] = -{ - /* N-0 */ - {ST_NCCI_NONE, ST_NCCI_OUTGOING, EV_NCCI_CONNECT_B3_REQ, NULL}, - {ST_NCCI_NONE, ST_NCCI_INCOMING, EV_NCCI_CONNECT_B3_IND, NULL}, - /* N-0.1 */ - {ST_NCCI_OUTGOING, ST_NCCI_ALLOCATED, EV_NCCI_CONNECT_B3_CONF_OK, NULL}, - {ST_NCCI_OUTGOING, ST_NCCI_NONE, EV_NCCI_CONNECT_B3_CONF_ERROR, n0}, - /* N-1 */ - {ST_NCCI_INCOMING, ST_NCCI_DISCONNECTING, EV_NCCI_CONNECT_B3_REJECT, NULL}, - {ST_NCCI_INCOMING, ST_NCCI_ALLOCATED, EV_NCCI_CONNECT_B3_RESP, NULL}, - {ST_NCCI_INCOMING, ST_NCCI_DISCONNECTED, EV_NCCI_DISCONNECT_B3_IND, NULL}, - {ST_NCCI_INCOMING, ST_NCCI_DISCONNECTING, EV_NCCI_DISCONNECT_B3_REQ, NULL}, - /* N-2 */ - {ST_NCCI_ALLOCATED, ST_NCCI_ACTIVE, EV_NCCI_CONNECT_B3_ACTIVE_IND, NULL}, - {ST_NCCI_ALLOCATED, ST_NCCI_DISCONNECTED, EV_NCCI_DISCONNECT_B3_IND, NULL}, - {ST_NCCI_ALLOCATED, ST_NCCI_DISCONNECTING, EV_NCCI_DISCONNECT_B3_REQ, NULL}, - /* N-ACT */ - {ST_NCCI_ACTIVE, ST_NCCI_ACTIVE, EV_NCCI_RESET_B3_IND, NULL}, - {ST_NCCI_ACTIVE, ST_NCCI_RESETING, EV_NCCI_RESET_B3_REQ, NULL}, - {ST_NCCI_ACTIVE, ST_NCCI_DISCONNECTED, EV_NCCI_DISCONNECT_B3_IND, NULL}, - {ST_NCCI_ACTIVE, ST_NCCI_DISCONNECTING, EV_NCCI_DISCONNECT_B3_REQ, NULL}, - /* N-3 */ - {ST_NCCI_RESETING, ST_NCCI_ACTIVE, EV_NCCI_RESET_B3_IND, NULL}, - {ST_NCCI_RESETING, ST_NCCI_DISCONNECTED, EV_NCCI_DISCONNECT_B3_IND, NULL}, - {ST_NCCI_RESETING, ST_NCCI_DISCONNECTING, EV_NCCI_DISCONNECT_B3_REQ, NULL}, - /* N-4 */ - {ST_NCCI_DISCONNECTING, ST_NCCI_DISCONNECTED, EV_NCCI_DISCONNECT_B3_IND, NULL}, - {ST_NCCI_DISCONNECTING, ST_NCCI_PREVIOUS, EV_NCCI_DISCONNECT_B3_CONF_ERROR, NULL}, - /* N-5 */ - {ST_NCCI_DISCONNECTED, ST_NCCI_NONE, EV_NCCI_DISCONNECT_B3_RESP, n0}, - {}, -}; - -static void ncci_change_state(capidrv_contr *card, capidrv_ncci *ncci, int event) -{ - struct nccistatechange *p = nccitable; - while (p->event) { - if (ncci->state == p->actstate && p->event == event) { - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: ncci_change_state:0x%x %d -> %d\n", - card->contrnr, ncci->ncci, ncci->state, p->nextstate); - if (p->nextstate == ST_NCCI_PREVIOUS) { - ncci->state = ncci->oldstate; - ncci->oldstate = p->actstate; - } else { - ncci->oldstate = p->actstate; - ncci->state = p->nextstate; - } - if (p->changefunc) - p->changefunc(card, ncci); - return; - } - p++; - } - printk(KERN_ERR "capidrv-%d: ncci_change_state:0x%x state=%d event=%d ????\n", - card->contrnr, ncci->ncci, ncci->state, event); -} - -/* ------------------------------------------------------------------- */ - -static inline int new_bchan(capidrv_contr *card) -{ - int i; - for (i = 0; i < card->nbchan; i++) { - if (card->bchans[i].plcip == NULL) { - card->bchans[i].disconnecting = 0; - return i; - } - } - return -1; -} - -/* ------------------------------------------------------------------- */ -static char *capi_info2str(u16 reason) -{ -#ifndef CONFIG_ISDN_CAPI_CAPIDRV_VERBOSE - return ".."; -#else - switch (reason) { - -/*-- informative values (corresponding message was processed) -----*/ - case 0x0001: - return "NCPI not supported by current protocol, NCPI ignored"; - case 0x0002: - return "Flags not supported by current protocol, flags ignored"; - case 0x0003: - return "Alert already sent by another application"; - -/*-- error information concerning CAPI_REGISTER -----*/ - case 0x1001: - return "Too many applications"; - case 0x1002: - return "Logical block size too small, must be at least 128 Bytes"; - case 0x1003: - return "Buffer exceeds 64 kByte"; - case 0x1004: - return "Message buffer size too small, must be at least 1024 Bytes"; - case 0x1005: - return "Max. number of logical connections not supported"; - case 0x1006: - return "Reserved"; - case 0x1007: - return "The message could not be accepted because of an internal busy condition"; - case 0x1008: - return "OS resource error (no memory ?)"; - case 0x1009: - return "CAPI not installed"; - case 0x100A: - return "Controller does not support external equipment"; - case 0x100B: - return "Controller does only support external equipment"; - -/*-- error information concerning message exchange functions -----*/ - case 0x1101: - return "Illegal application number"; - case 0x1102: - return "Illegal command or subcommand or message length less than 12 bytes"; - case 0x1103: - return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI"; - case 0x1104: - return "Queue is empty"; - case 0x1105: - return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE"; - case 0x1106: - return "Unknown notification parameter"; - case 0x1107: - return "The Message could not be accepted because of an internal busy condition"; - case 0x1108: - return "OS Resource error (no memory ?)"; - case 0x1109: - return "CAPI not installed"; - case 0x110A: - return "Controller does not support external equipment"; - case 0x110B: - return "Controller does only support external equipment"; - -/*-- error information concerning resource / coding problems -----*/ - case 0x2001: - return "Message not supported in current state"; - case 0x2002: - return "Illegal Controller / PLCI / NCCI"; - case 0x2003: - return "Out of PLCI"; - case 0x2004: - return "Out of NCCI"; - case 0x2005: - return "Out of LISTEN"; - case 0x2006: - return "Out of FAX resources (protocol T.30)"; - case 0x2007: - return "Illegal message parameter coding"; - -/*-- error information concerning requested services -----*/ - case 0x3001: - return "B1 protocol not supported"; - case 0x3002: - return "B2 protocol not supported"; - case 0x3003: - return "B3 protocol not supported"; - case 0x3004: - return "B1 protocol parameter not supported"; - case 0x3005: - return "B2 protocol parameter not supported"; - case 0x3006: - return "B3 protocol parameter not supported"; - case 0x3007: - return "B protocol combination not supported"; - case 0x3008: - return "NCPI not supported"; - case 0x3009: - return "CIP Value unknown"; - case 0x300A: - return "Flags not supported (reserved bits)"; - case 0x300B: - return "Facility not supported"; - case 0x300C: - return "Data length not supported by current protocol"; - case 0x300D: - return "Reset procedure not supported by current protocol"; - -/*-- informations about the clearing of a physical connection -----*/ - case 0x3301: - return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)"; - case 0x3302: - return "Protocol error layer 2"; - case 0x3303: - return "Protocol error layer 3"; - case 0x3304: - return "Another application got that call"; -/*-- T.30 specific reasons -----*/ - case 0x3311: - return "Connecting not successful (remote station is no FAX G3 machine)"; - case 0x3312: - return "Connecting not successful (training error)"; - case 0x3313: - return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)"; - case 0x3314: - return "Disconnected during transfer (remote abort)"; - case 0x3315: - return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)"; - case 0x3316: - return "Disconnected during transfer (local tx data underrun)"; - case 0x3317: - return "Disconnected during transfer (local rx data overflow)"; - case 0x3318: - return "Disconnected during transfer (local abort)"; - case 0x3319: - return "Illegal parameter coding (e.g. SFF coding error)"; - -/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/ - case 0x3481: return "Unallocated (unassigned) number"; - case 0x3482: return "No route to specified transit network"; - case 0x3483: return "No route to destination"; - case 0x3486: return "Channel unacceptable"; - case 0x3487: - return "Call awarded and being delivered in an established channel"; - case 0x3490: return "Normal call clearing"; - case 0x3491: return "User busy"; - case 0x3492: return "No user responding"; - case 0x3493: return "No answer from user (user alerted)"; - case 0x3495: return "Call rejected"; - case 0x3496: return "Number changed"; - case 0x349A: return "Non-selected user clearing"; - case 0x349B: return "Destination out of order"; - case 0x349C: return "Invalid number format"; - case 0x349D: return "Facility rejected"; - case 0x349E: return "Response to STATUS ENQUIRY"; - case 0x349F: return "Normal, unspecified"; - case 0x34A2: return "No circuit / channel available"; - case 0x34A6: return "Network out of order"; - case 0x34A9: return "Temporary failure"; - case 0x34AA: return "Switching equipment congestion"; - case 0x34AB: return "Access information discarded"; - case 0x34AC: return "Requested circuit / channel not available"; - case 0x34AF: return "Resources unavailable, unspecified"; - case 0x34B1: return "Quality of service unavailable"; - case 0x34B2: return "Requested facility not subscribed"; - case 0x34B9: return "Bearer capability not authorized"; - case 0x34BA: return "Bearer capability not presently available"; - case 0x34BF: return "Service or option not available, unspecified"; - case 0x34C1: return "Bearer capability not implemented"; - case 0x34C2: return "Channel type not implemented"; - case 0x34C5: return "Requested facility not implemented"; - case 0x34C6: return "Only restricted digital information bearer capability is available"; - case 0x34CF: return "Service or option not implemented, unspecified"; - case 0x34D1: return "Invalid call reference value"; - case 0x34D2: return "Identified channel does not exist"; - case 0x34D3: return "A suspended call exists, but this call identity does not"; - case 0x34D4: return "Call identity in use"; - case 0x34D5: return "No call suspended"; - case 0x34D6: return "Call having the requested call identity has been cleared"; - case 0x34D8: return "Incompatible destination"; - case 0x34DB: return "Invalid transit network selection"; - case 0x34DF: return "Invalid message, unspecified"; - case 0x34E0: return "Mandatory information element is missing"; - case 0x34E1: return "Message type non-existent or not implemented"; - case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented"; - case 0x34E3: return "Information element non-existent or not implemented"; - case 0x34E4: return "Invalid information element contents"; - case 0x34E5: return "Message not compatible with call state"; - case 0x34E6: return "Recovery on timer expiry"; - case 0x34EF: return "Protocol error, unspecified"; - case 0x34FF: return "Interworking, unspecified"; - - default: return "No additional information"; - } -#endif -} - -static void handle_controller(_cmsg *cmsg) -{ - capidrv_contr *card = findcontrbynumber(cmsg->adr.adrController & 0x7f); - - if (!card) { - printk(KERN_ERR "capidrv: %s from unknown controller 0x%x\n", - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController & 0x7f); - return; - } - switch (CAPICMD(cmsg->Command, cmsg->Subcommand)) { - - case CAPI_LISTEN_CONF: /* Controller */ - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: listenconf Info=0x%4x (%s) cipmask=0x%x\n", - card->contrnr, cmsg->Info, capi_info2str(cmsg->Info), card->cipmask); - if (cmsg->Info) { - listen_change_state(card, EV_LISTEN_CONF_ERROR); - } else if (card->cipmask == 0) { - listen_change_state(card, EV_LISTEN_CONF_EMPTY); - } else { - listen_change_state(card, EV_LISTEN_CONF_OK); - } - break; - - case CAPI_MANUFACTURER_IND: /* Controller */ - if (cmsg->ManuID == 0x214D5641 - && cmsg->Class == 0 - && cmsg->Function == 1) { - u8 *data = cmsg->ManuData + 3; - u16 len = cmsg->ManuData[0]; - u16 layer; - int direction; - if (len == 255) { - len = (cmsg->ManuData[1] | (cmsg->ManuData[2] << 8)); - data += 2; - } - len -= 2; - layer = ((*(data - 1)) << 8) | *(data - 2); - if (layer & 0x300) - direction = (layer & 0x200) ? 0 : 1; - else direction = (layer & 0x800) ? 0 : 1; - if (layer & 0x0C00) { - if ((layer & 0xff) == 0x80) { - handle_dtrace_data(card, direction, 1, data, len); - break; - } - } else if ((layer & 0xff) < 0x80) { - handle_dtrace_data(card, direction, 0, data, len); - break; - } - printk(KERN_INFO "capidrv-%d: %s from controller 0x%x layer 0x%x, ignored\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController, layer); - break; - } - goto ignored; - case CAPI_MANUFACTURER_CONF: /* Controller */ - if (cmsg->ManuID == 0x214D5641) { - char *s = NULL; - switch (cmsg->Class) { - case 0: break; - case 1: s = "unknown class"; break; - case 2: s = "unknown function"; break; - default: s = "unknown error"; break; - } - if (s) - printk(KERN_INFO "capidrv-%d: %s from controller 0x%x function %d: %s\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController, - cmsg->Function, s); - break; - } - goto ignored; - case CAPI_FACILITY_IND: /* Controller/plci/ncci */ - goto ignored; - case CAPI_FACILITY_CONF: /* Controller/plci/ncci */ - goto ignored; - case CAPI_INFO_IND: /* Controller/plci */ - goto ignored; - case CAPI_INFO_CONF: /* Controller/plci */ - goto ignored; - - default: - printk(KERN_ERR "capidrv-%d: got %s from controller 0x%x ???", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController); - } - return; - -ignored: - printk(KERN_INFO "capidrv-%d: %s from controller 0x%x ignored\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController); -} - -static void handle_incoming_call(capidrv_contr *card, _cmsg *cmsg) -{ - capidrv_plci *plcip; - capidrv_bchan *bchan; - isdn_ctrl cmd; - int chan; - - if ((chan = new_bchan(card)) == -1) { - printk(KERN_ERR "capidrv-%d: incoming call on not existing bchan ?\n", card->contrnr); - return; - } - bchan = &card->bchans[chan]; - if ((plcip = new_plci(card, chan)) == NULL) { - printk(KERN_ERR "capidrv-%d: incoming call: no memory, sorry.\n", card->contrnr); - return; - } - bchan->incoming = 1; - plcip->plci = cmsg->adr.adrPLCI; - plci_change_state(card, plcip, EV_PLCI_CONNECT_IND); - - cmd.command = ISDN_STAT_ICALL; - cmd.driver = card->myid; - cmd.arg = chan; - memset(&cmd.parm.setup, 0, sizeof(cmd.parm.setup)); - strncpy(cmd.parm.setup.phone, - cmsg->CallingPartyNumber + 3, - cmsg->CallingPartyNumber[0] - 2); - strncpy(cmd.parm.setup.eazmsn, - cmsg->CalledPartyNumber + 2, - cmsg->CalledPartyNumber[0] - 1); - cmd.parm.setup.si1 = cip2si1(cmsg->CIPValue); - cmd.parm.setup.si2 = cip2si2(cmsg->CIPValue); - cmd.parm.setup.plan = cmsg->CallingPartyNumber[1]; - cmd.parm.setup.screen = cmsg->CallingPartyNumber[2]; - - printk(KERN_INFO "capidrv-%d: incoming call %s,%d,%d,%s\n", - card->contrnr, - cmd.parm.setup.phone, - cmd.parm.setup.si1, - cmd.parm.setup.si2, - cmd.parm.setup.eazmsn); - - if (cmd.parm.setup.si1 == 1 && cmd.parm.setup.si2 != 0) { - printk(KERN_INFO "capidrv-%d: patching si2=%d to 0 for VBOX\n", - card->contrnr, - cmd.parm.setup.si2); - cmd.parm.setup.si2 = 0; - } - - switch (card->interface.statcallb(&cmd)) { - case 0: - case 3: - /* No device matching this call. - * and isdn_common.c has send a HANGUP command - * which is ignored in state ST_PLCI_INCOMING, - * so we send RESP to ignore the call - */ - capi_cmsg_answer(cmsg); - cmsg->Reject = 1; /* ignore */ - plci_change_state(card, plcip, EV_PLCI_CONNECT_REJECT); - send_message(card, cmsg); - printk(KERN_INFO "capidrv-%d: incoming call %s,%d,%d,%s ignored\n", - card->contrnr, - cmd.parm.setup.phone, - cmd.parm.setup.si1, - cmd.parm.setup.si2, - cmd.parm.setup.eazmsn); - break; - case 1: - /* At least one device matching this call (RING on ttyI) - * HL-driver may send ALERTING on the D-channel in this - * case. - * really means: RING on ttyI or a net interface - * accepted this call already. - * - * If the call was accepted, state has already changed, - * and CONNECT_RESP already sent. - */ - if (plcip->state == ST_PLCI_INCOMING) { - printk(KERN_INFO "capidrv-%d: incoming call %s,%d,%d,%s tty alerting\n", - card->contrnr, - cmd.parm.setup.phone, - cmd.parm.setup.si1, - cmd.parm.setup.si2, - cmd.parm.setup.eazmsn); - capi_fill_ALERT_REQ(cmsg, - global.ap.applid, - card->msgid++, - plcip->plci, /* adr */ - NULL,/* BChannelinformation */ - NULL,/* Keypadfacility */ - NULL,/* Useruserdata */ - NULL /* Facilitydataarray */ - ); - plcip->msgid = cmsg->Messagenumber; - send_message(card, cmsg); - } else { - printk(KERN_INFO "capidrv-%d: incoming call %s,%d,%d,%s on netdev\n", - card->contrnr, - cmd.parm.setup.phone, - cmd.parm.setup.si1, - cmd.parm.setup.si2, - cmd.parm.setup.eazmsn); - } - break; - - case 2: /* Call will be rejected. */ - capi_cmsg_answer(cmsg); - cmsg->Reject = 2; /* reject call, normal call clearing */ - plci_change_state(card, plcip, EV_PLCI_CONNECT_REJECT); - send_message(card, cmsg); - break; - - default: - /* An error happened. (Invalid parameters for example.) */ - capi_cmsg_answer(cmsg); - cmsg->Reject = 8; /* reject call, - destination out of order */ - plci_change_state(card, plcip, EV_PLCI_CONNECT_REJECT); - send_message(card, cmsg); - break; - } - return; -} - -static void handle_plci(_cmsg *cmsg) -{ - capidrv_contr *card = findcontrbynumber(cmsg->adr.adrController & 0x7f); - capidrv_plci *plcip; - isdn_ctrl cmd; - _cdebbuf *cdb; - - if (!card) { - printk(KERN_ERR "capidrv: %s from unknown controller 0x%x\n", - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController & 0x7f); - return; - } - switch (CAPICMD(cmsg->Command, cmsg->Subcommand)) { - - case CAPI_DISCONNECT_IND: /* plci */ - if (cmsg->Reason) { - printk(KERN_INFO "capidrv-%d: %s reason 0x%x (%s) for plci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Reason, capi_info2str(cmsg->Reason), cmsg->adr.adrPLCI); - } - if (!(plcip = find_plci_by_plci(card, cmsg->adr.adrPLCI))) { - capi_cmsg_answer(cmsg); - send_message(card, cmsg); - goto notfound; - } - card->bchans[plcip->chan].disconnecting = 1; - plci_change_state(card, plcip, EV_PLCI_DISCONNECT_IND); - capi_cmsg_answer(cmsg); - plci_change_state(card, plcip, EV_PLCI_DISCONNECT_RESP); - send_message(card, cmsg); - break; - - case CAPI_DISCONNECT_CONF: /* plci */ - if (cmsg->Info) { - printk(KERN_INFO "capidrv-%d: %s info 0x%x (%s) for plci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Info, capi_info2str(cmsg->Info), - cmsg->adr.adrPLCI); - } - if (!(plcip = find_plci_by_plci(card, cmsg->adr.adrPLCI))) - goto notfound; - - card->bchans[plcip->chan].disconnecting = 1; - break; - - case CAPI_ALERT_CONF: /* plci */ - if (cmsg->Info) { - printk(KERN_INFO "capidrv-%d: %s info 0x%x (%s) for plci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Info, capi_info2str(cmsg->Info), - cmsg->adr.adrPLCI); - } - break; - - case CAPI_CONNECT_IND: /* plci */ - handle_incoming_call(card, cmsg); - break; - - case CAPI_CONNECT_CONF: /* plci */ - if (cmsg->Info) { - printk(KERN_INFO "capidrv-%d: %s info 0x%x (%s) for plci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Info, capi_info2str(cmsg->Info), - cmsg->adr.adrPLCI); - } - if (!(plcip = find_plci_by_msgid(card, cmsg->Messagenumber))) - goto notfound; - - plcip->plci = cmsg->adr.adrPLCI; - if (cmsg->Info) { - plci_change_state(card, plcip, EV_PLCI_CONNECT_CONF_ERROR); - } else { - plci_change_state(card, plcip, EV_PLCI_CONNECT_CONF_OK); - } - break; - - case CAPI_CONNECT_ACTIVE_IND: /* plci */ - - if (!(plcip = find_plci_by_plci(card, cmsg->adr.adrPLCI))) - goto notfound; - - if (card->bchans[plcip->chan].incoming) { - capi_cmsg_answer(cmsg); - plci_change_state(card, plcip, EV_PLCI_CONNECT_ACTIVE_IND); - send_message(card, cmsg); - } else { - capidrv_ncci *nccip; - capi_cmsg_answer(cmsg); - send_message(card, cmsg); - - nccip = new_ncci(card, plcip, cmsg->adr.adrPLCI); - - if (!nccip) { - printk(KERN_ERR "capidrv-%d: no mem for ncci, sorry\n", card->contrnr); - break; /* $$$$ */ - } - capi_fill_CONNECT_B3_REQ(cmsg, - global.ap.applid, - card->msgid++, - plcip->plci, /* adr */ - NULL /* NCPI */ - ); - nccip->msgid = cmsg->Messagenumber; - plci_change_state(card, plcip, - EV_PLCI_CONNECT_ACTIVE_IND); - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_REQ); - send_message(card, cmsg); - cmd.command = ISDN_STAT_DCONN; - cmd.driver = card->myid; - cmd.arg = plcip->chan; - card->interface.statcallb(&cmd); - } - break; - - case CAPI_INFO_IND: /* Controller/plci */ - - if (!(plcip = find_plci_by_plci(card, cmsg->adr.adrPLCI))) - goto notfound; - - if (cmsg->InfoNumber == 0x4000) { - if (cmsg->InfoElement[0] == 4) { - cmd.command = ISDN_STAT_CINF; - cmd.driver = card->myid; - cmd.arg = plcip->chan; - sprintf(cmd.parm.num, "%lu", - (unsigned long) - ((u32) cmsg->InfoElement[1] - | ((u32) (cmsg->InfoElement[2]) << 8) - | ((u32) (cmsg->InfoElement[3]) << 16) - | ((u32) (cmsg->InfoElement[4]) << 24))); - card->interface.statcallb(&cmd); - break; - } - } - cdb = capi_cmsg2str(cmsg); - if (cdb) { - printk(KERN_WARNING "capidrv-%d: %s\n", - card->contrnr, cdb->buf); - cdebbuf_free(cdb); - } else - printk(KERN_WARNING "capidrv-%d: CAPI_INFO_IND InfoNumber %x not handled\n", - card->contrnr, cmsg->InfoNumber); - - break; - - case CAPI_CONNECT_ACTIVE_CONF: /* plci */ - goto ignored; - case CAPI_SELECT_B_PROTOCOL_CONF: /* plci */ - goto ignored; - case CAPI_FACILITY_IND: /* Controller/plci/ncci */ - goto ignored; - case CAPI_FACILITY_CONF: /* Controller/plci/ncci */ - goto ignored; - - case CAPI_INFO_CONF: /* Controller/plci */ - goto ignored; - - default: - printk(KERN_ERR "capidrv-%d: got %s for plci 0x%x ???", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrPLCI); - } - return; -ignored: - printk(KERN_INFO "capidrv-%d: %s for plci 0x%x ignored\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrPLCI); - return; -notfound: - printk(KERN_ERR "capidrv-%d: %s: plci 0x%x not found\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrPLCI); - return; -} - -static void handle_ncci(_cmsg *cmsg) -{ - capidrv_contr *card = findcontrbynumber(cmsg->adr.adrController & 0x7f); - capidrv_plci *plcip; - capidrv_ncci *nccip; - isdn_ctrl cmd; - int len; - - if (!card) { - printk(KERN_ERR "capidrv: %s from unknown controller 0x%x\n", - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController & 0x7f); - return; - } - switch (CAPICMD(cmsg->Command, cmsg->Subcommand)) { - - case CAPI_CONNECT_B3_ACTIVE_IND: /* ncci */ - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) - goto notfound; - - capi_cmsg_answer(cmsg); - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_ACTIVE_IND); - send_message(card, cmsg); - - cmd.command = ISDN_STAT_BCONN; - cmd.driver = card->myid; - cmd.arg = nccip->chan; - card->interface.statcallb(&cmd); - - printk(KERN_INFO "capidrv-%d: chan %d up with ncci 0x%x\n", - card->contrnr, nccip->chan, nccip->ncci); - break; - - case CAPI_CONNECT_B3_ACTIVE_CONF: /* ncci */ - goto ignored; - - case CAPI_CONNECT_B3_IND: /* ncci */ - - plcip = find_plci_by_ncci(card, cmsg->adr.adrNCCI); - if (plcip) { - nccip = new_ncci(card, plcip, cmsg->adr.adrNCCI); - if (nccip) { - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_IND); - capi_fill_CONNECT_B3_RESP(cmsg, - global.ap.applid, - card->msgid++, - nccip->ncci, /* adr */ - 0, /* Reject */ - NULL /* NCPI */ - ); - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_RESP); - send_message(card, cmsg); - break; - } - printk(KERN_ERR "capidrv-%d: no mem for ncci, sorry\n", card->contrnr); - } else { - printk(KERN_ERR "capidrv-%d: %s: plci for ncci 0x%x not found\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrNCCI); - } - capi_fill_CONNECT_B3_RESP(cmsg, - global.ap.applid, - card->msgid++, - cmsg->adr.adrNCCI, - 2, /* Reject */ - NULL /* NCPI */ - ); - send_message(card, cmsg); - break; - - case CAPI_CONNECT_B3_CONF: /* ncci */ - - if (!(nccip = find_ncci_by_msgid(card, - cmsg->adr.adrNCCI, - cmsg->Messagenumber))) - goto notfound; - - nccip->ncci = cmsg->adr.adrNCCI; - if (cmsg->Info) { - printk(KERN_INFO "capidrv-%d: %s info 0x%x (%s) for ncci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Info, capi_info2str(cmsg->Info), - cmsg->adr.adrNCCI); - } - - if (cmsg->Info) - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_CONF_ERROR); - else - ncci_change_state(card, nccip, EV_NCCI_CONNECT_B3_CONF_OK); - break; - - case CAPI_CONNECT_B3_T90_ACTIVE_IND: /* ncci */ - capi_cmsg_answer(cmsg); - send_message(card, cmsg); - break; - - case CAPI_DATA_B3_IND: /* ncci */ - /* handled in handle_data() */ - goto ignored; - - case CAPI_DATA_B3_CONF: /* ncci */ - if (cmsg->Info) { - printk(KERN_WARNING "CAPI_DATA_B3_CONF: Info %x - %s\n", - cmsg->Info, capi_info2str(cmsg->Info)); - } - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) - goto notfound; - - len = capidrv_del_ack(nccip, cmsg->DataHandle); - if (len < 0) - break; - cmd.command = ISDN_STAT_BSENT; - cmd.driver = card->myid; - cmd.arg = nccip->chan; - cmd.parm.length = len; - card->interface.statcallb(&cmd); - break; - - case CAPI_DISCONNECT_B3_IND: /* ncci */ - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) - goto notfound; - - card->bchans[nccip->chan].disconnecting = 1; - ncci_change_state(card, nccip, EV_NCCI_DISCONNECT_B3_IND); - capi_cmsg_answer(cmsg); - ncci_change_state(card, nccip, EV_NCCI_DISCONNECT_B3_RESP); - send_message(card, cmsg); - break; - - case CAPI_DISCONNECT_B3_CONF: /* ncci */ - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) - goto notfound; - if (cmsg->Info) { - printk(KERN_INFO "capidrv-%d: %s info 0x%x (%s) for ncci 0x%x\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->Info, capi_info2str(cmsg->Info), - cmsg->adr.adrNCCI); - ncci_change_state(card, nccip, EV_NCCI_DISCONNECT_B3_CONF_ERROR); - } - break; - - case CAPI_RESET_B3_IND: /* ncci */ - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) - goto notfound; - ncci_change_state(card, nccip, EV_NCCI_RESET_B3_IND); - capi_cmsg_answer(cmsg); - send_message(card, cmsg); - break; - - case CAPI_RESET_B3_CONF: /* ncci */ - goto ignored; /* $$$$ */ - - case CAPI_FACILITY_IND: /* Controller/plci/ncci */ - goto ignored; - case CAPI_FACILITY_CONF: /* Controller/plci/ncci */ - goto ignored; - - default: - printk(KERN_ERR "capidrv-%d: got %s for ncci 0x%x ???", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrNCCI); - } - return; -ignored: - printk(KERN_INFO "capidrv-%d: %s for ncci 0x%x ignored\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrNCCI); - return; -notfound: - printk(KERN_ERR "capidrv-%d: %s: ncci 0x%x not found\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrNCCI); -} - - -static void handle_data(_cmsg *cmsg, struct sk_buff *skb) -{ - capidrv_contr *card = findcontrbynumber(cmsg->adr.adrController & 0x7f); - capidrv_ncci *nccip; - - if (!card) { - printk(KERN_ERR "capidrv: %s from unknown controller 0x%x\n", - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrController & 0x7f); - kfree_skb(skb); - return; - } - if (!(nccip = find_ncci(card, cmsg->adr.adrNCCI))) { - printk(KERN_ERR "capidrv-%d: %s: ncci 0x%x not found\n", - card->contrnr, - capi_cmd2str(cmsg->Command, cmsg->Subcommand), - cmsg->adr.adrNCCI); - kfree_skb(skb); - return; - } - (void) skb_pull(skb, CAPIMSG_LEN(skb->data)); - card->interface.rcvcallb_skb(card->myid, nccip->chan, skb); - capi_cmsg_answer(cmsg); - send_message(card, cmsg); -} - -static _cmsg s_cmsg; - -static void capidrv_recv_message(struct capi20_appl *ap, struct sk_buff *skb) -{ - if (capi_message2cmsg(&s_cmsg, skb->data)) { - printk(KERN_ERR "capidrv: applid=%d: received invalid message\n", - ap->applid); - kfree_skb(skb); - return; - } - if (debugmode > 3) { - _cdebbuf *cdb = capi_cmsg2str(&s_cmsg); - - if (cdb) { - printk(KERN_DEBUG "%s: applid=%d %s\n", __func__, - ap->applid, cdb->buf); - cdebbuf_free(cdb); - } else - printk(KERN_DEBUG "%s: applid=%d %s not traced\n", - __func__, ap->applid, - capi_cmd2str(s_cmsg.Command, s_cmsg.Subcommand)); - } - if (s_cmsg.Command == CAPI_DATA_B3 - && s_cmsg.Subcommand == CAPI_IND) { - handle_data(&s_cmsg, skb); - return; - } - if ((s_cmsg.adr.adrController & 0xffffff00) == 0) - handle_controller(&s_cmsg); - else if ((s_cmsg.adr.adrPLCI & 0xffff0000) == 0) - handle_plci(&s_cmsg); - else - handle_ncci(&s_cmsg); - /* - * data of skb used in s_cmsg, - * free data when s_cmsg is not used again - * thanks to Lars Heete - */ - kfree_skb(skb); -} - -/* ------------------------------------------------------------------- */ - -#define PUTBYTE_TO_STATUS(card, byte) \ - do { \ - *(card)->q931_write++ = (byte); \ - if ((card)->q931_write > (card)->q931_end) \ - (card)->q931_write = (card)->q931_buf; \ - } while (0) - -static void handle_dtrace_data(capidrv_contr *card, - int send, int level2, u8 *data, u16 len) -{ - u8 *p, *end; - isdn_ctrl cmd; - - if (!len) { - printk(KERN_DEBUG "capidrv-%d: avmb1_q931_data: len == %d\n", - card->contrnr, len); - return; - } - - if (level2) { - PUTBYTE_TO_STATUS(card, 'D'); - PUTBYTE_TO_STATUS(card, '2'); - PUTBYTE_TO_STATUS(card, send ? '>' : '<'); - PUTBYTE_TO_STATUS(card, ':'); - } else { - PUTBYTE_TO_STATUS(card, 'D'); - PUTBYTE_TO_STATUS(card, '3'); - PUTBYTE_TO_STATUS(card, send ? '>' : '<'); - PUTBYTE_TO_STATUS(card, ':'); - } - - for (p = data, end = data + len; p < end; p++) { - PUTBYTE_TO_STATUS(card, ' '); - PUTBYTE_TO_STATUS(card, hex_asc_hi(*p)); - PUTBYTE_TO_STATUS(card, hex_asc_lo(*p)); - } - PUTBYTE_TO_STATUS(card, '\n'); - - cmd.command = ISDN_STAT_STAVAIL; - cmd.driver = card->myid; - cmd.arg = len * 3 + 5; - card->interface.statcallb(&cmd); -} - -/* ------------------------------------------------------------------- */ - -static _cmsg cmdcmsg; - -static int capidrv_ioctl(isdn_ctrl *c, capidrv_contr *card) -{ - switch (c->arg) { - case 1: - debugmode = (int)(*((unsigned int *)c->parm.num)); - printk(KERN_DEBUG "capidrv-%d: debugmode=%d\n", - card->contrnr, debugmode); - return 0; - default: - printk(KERN_DEBUG "capidrv-%d: capidrv_ioctl(%ld) called ??\n", - card->contrnr, c->arg); - return -EINVAL; - } - return -EINVAL; -} - -/* - * Handle leased lines (CAPI-Bundling) - */ - -struct internal_bchannelinfo { - unsigned short channelalloc; - unsigned short operation; - unsigned char cmask[31]; -}; - -static int decodeFVteln(char *teln, unsigned long *bmaskp, int *activep) -{ - unsigned long bmask = 0; - int active = !0; - char *s; - int i; - - if (strncmp(teln, "FV:", 3) != 0) - return 1; - s = teln + 3; - while (*s && *s == ' ') s++; - if (!*s) return -2; - if (*s == 'p' || *s == 'P') { - active = 0; - s++; - } - if (*s == 'a' || *s == 'A') { - active = !0; - s++; - } - while (*s) { - int digit1 = 0; - int digit2 = 0; - char *endp; - - digit1 = simple_strtoul(s, &endp, 10); - if (s == endp) - return -3; - s = endp; - - if (digit1 <= 0 || digit1 > 30) return -4; - if (*s == 0 || *s == ',' || *s == ' ') { - bmask |= (1 << digit1); - digit1 = 0; - if (*s) s++; - continue; - } - if (*s != '-') return -5; - s++; - - digit2 = simple_strtoul(s, &endp, 10); - if (s == endp) - return -3; - s = endp; - - if (digit2 <= 0 || digit2 > 30) return -4; - if (*s == 0 || *s == ',' || *s == ' ') { - if (digit1 > digit2) - for (i = digit2; i <= digit1; i++) - bmask |= (1 << i); - else - for (i = digit1; i <= digit2; i++) - bmask |= (1 << i); - digit1 = digit2 = 0; - if (*s) s++; - continue; - } - return -6; - } - if (activep) *activep = active; - if (bmaskp) *bmaskp = bmask; - return 0; -} - -static int FVteln2capi20(char *teln, u8 AdditionalInfo[1 + 2 + 2 + 31]) -{ - unsigned long bmask; - int active; - int rc, i; - - rc = decodeFVteln(teln, &bmask, &active); - if (rc) return rc; - /* Length */ - AdditionalInfo[0] = 2 + 2 + 31; - /* Channel: 3 => use channel allocation */ - AdditionalInfo[1] = 3; AdditionalInfo[2] = 0; - /* Operation: 0 => DTE mode, 1 => DCE mode */ - if (active) { - AdditionalInfo[3] = 0; AdditionalInfo[4] = 0; - } else { - AdditionalInfo[3] = 1; AdditionalInfo[4] = 0; - } - /* Channel mask array */ - AdditionalInfo[5] = 0; /* no D-Channel */ - for (i = 1; i <= 30; i++) - AdditionalInfo[5 + i] = (bmask & (1 << i)) ? 0xff : 0; - return 0; -} - -static int capidrv_command(isdn_ctrl *c, capidrv_contr *card) -{ - isdn_ctrl cmd; - struct capidrv_bchan *bchan; - struct capidrv_plci *plcip; - u8 AdditionalInfo[1 + 2 + 2 + 31]; - int rc, isleasedline = 0; - - if (c->command == ISDN_CMD_IOCTL) - return capidrv_ioctl(c, card); - - switch (c->command) { - case ISDN_CMD_DIAL: { - u8 calling[ISDN_MSNLEN + 3]; - u8 called[ISDN_MSNLEN + 2]; - - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: ISDN_CMD_DIAL(ch=%ld,\"%s,%d,%d,%s\")\n", - card->contrnr, - c->arg, - c->parm.setup.phone, - c->parm.setup.si1, - c->parm.setup.si2, - c->parm.setup.eazmsn); - - bchan = &card->bchans[c->arg % card->nbchan]; - - if (bchan->plcip) { - printk(KERN_ERR "capidrv-%d: dail ch=%ld,\"%s,%d,%d,%s\" in use (plci=0x%x)\n", - card->contrnr, - c->arg, - c->parm.setup.phone, - c->parm.setup.si1, - c->parm.setup.si2, - c->parm.setup.eazmsn, - bchan->plcip->plci); - return 0; - } - bchan->si1 = c->parm.setup.si1; - bchan->si2 = c->parm.setup.si2; - - strncpy(bchan->num, c->parm.setup.phone, sizeof(bchan->num)); - strncpy(bchan->mynum, c->parm.setup.eazmsn, sizeof(bchan->mynum)); - rc = FVteln2capi20(bchan->num, AdditionalInfo); - isleasedline = (rc == 0); - if (rc < 0) - printk(KERN_ERR "capidrv-%d: WARNING: invalid leased linedefinition \"%s\"\n", card->contrnr, bchan->num); - - if (isleasedline) { - calling[0] = 0; - called[0] = 0; - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: connecting leased line\n", card->contrnr); - } else { - calling[0] = strlen(bchan->mynum) + 2; - calling[1] = 0; - calling[2] = 0x80; - strncpy(calling + 3, bchan->mynum, ISDN_MSNLEN); - called[0] = strlen(bchan->num) + 1; - called[1] = 0x80; - strncpy(called + 2, bchan->num, ISDN_MSNLEN); - } - - capi_fill_CONNECT_REQ(&cmdcmsg, - global.ap.applid, - card->msgid++, - card->contrnr, /* adr */ - si2cip(bchan->si1, bchan->si2), /* cipvalue */ - called, /* CalledPartyNumber */ - calling, /* CallingPartyNumber */ - NULL, /* CalledPartySubaddress */ - NULL, /* CallingPartySubaddress */ - b1prot(bchan->l2, bchan->l3), /* B1protocol */ - b2prot(bchan->l2, bchan->l3), /* B2protocol */ - b3prot(bchan->l2, bchan->l3), /* B3protocol */ - b1config(bchan->l2, bchan->l3), /* B1configuration */ - NULL, /* B2configuration */ - NULL, /* B3configuration */ - NULL, /* BC */ - NULL, /* LLC */ - NULL, /* HLC */ - /* BChannelinformation */ - isleasedline ? AdditionalInfo : NULL, - NULL, /* Keypadfacility */ - NULL, /* Useruserdata */ - NULL /* Facilitydataarray */ - ); - if ((plcip = new_plci(card, (c->arg % card->nbchan))) == NULL) { - cmd.command = ISDN_STAT_DHUP; - cmd.driver = card->myid; - cmd.arg = (c->arg % card->nbchan); - card->interface.statcallb(&cmd); - return -1; - } - plcip->msgid = cmdcmsg.Messagenumber; - plcip->leasedline = isleasedline; - plci_change_state(card, plcip, EV_PLCI_CONNECT_REQ); - send_message(card, &cmdcmsg); - return 0; - } - - case ISDN_CMD_ACCEPTD: - - bchan = &card->bchans[c->arg % card->nbchan]; - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: ISDN_CMD_ACCEPTD(ch=%ld) l2=%d l3=%d\n", - card->contrnr, - c->arg, bchan->l2, bchan->l3); - - capi_fill_CONNECT_RESP(&cmdcmsg, - global.ap.applid, - card->msgid++, - bchan->plcip->plci, /* adr */ - 0, /* Reject */ - b1prot(bchan->l2, bchan->l3), /* B1protocol */ - b2prot(bchan->l2, bchan->l3), /* B2protocol */ - b3prot(bchan->l2, bchan->l3), /* B3protocol */ - b1config(bchan->l2, bchan->l3), /* B1configuration */ - NULL, /* B2configuration */ - NULL, /* B3configuration */ - NULL, /* ConnectedNumber */ - NULL, /* ConnectedSubaddress */ - NULL, /* LLC */ - NULL, /* BChannelinformation */ - NULL, /* Keypadfacility */ - NULL, /* Useruserdata */ - NULL /* Facilitydataarray */ - ); - if (capi_cmsg2message(&cmdcmsg, cmdcmsg.buf)) { - printk(KERN_ERR "capidrv-%d: capidrv_command: parser failure\n", - card->contrnr); - return -EINVAL; - } - plci_change_state(card, bchan->plcip, EV_PLCI_CONNECT_RESP); - send_message(card, &cmdcmsg); - return 0; - - case ISDN_CMD_ACCEPTB: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: ISDN_CMD_ACCEPTB(ch=%ld)\n", - card->contrnr, - c->arg); - return -ENOSYS; - - case ISDN_CMD_HANGUP: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: ISDN_CMD_HANGUP(ch=%ld)\n", - card->contrnr, - c->arg); - bchan = &card->bchans[c->arg % card->nbchan]; - - if (bchan->disconnecting) { - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: chan %ld already disconnecting ...\n", - card->contrnr, - c->arg); - return 0; - } - if (bchan->nccip) { - bchan->disconnecting = 1; - capi_fill_DISCONNECT_B3_REQ(&cmdcmsg, - global.ap.applid, - card->msgid++, - bchan->nccip->ncci, - NULL /* NCPI */ - ); - ncci_change_state(card, bchan->nccip, EV_NCCI_DISCONNECT_B3_REQ); - send_message(card, &cmdcmsg); - return 0; - } else if (bchan->plcip) { - if (bchan->plcip->state == ST_PLCI_INCOMING) { - /* - * just ignore, we a called from - * isdn_status_callback(), - * which will return 0 or 2, this is handled - * by the CONNECT_IND handler - */ - bchan->disconnecting = 1; - return 0; - } else if (bchan->plcip->plci) { - bchan->disconnecting = 1; - capi_fill_DISCONNECT_REQ(&cmdcmsg, - global.ap.applid, - card->msgid++, - bchan->plcip->plci, - NULL, /* BChannelinformation */ - NULL, /* Keypadfacility */ - NULL, /* Useruserdata */ - NULL /* Facilitydataarray */ - ); - plci_change_state(card, bchan->plcip, EV_PLCI_DISCONNECT_REQ); - send_message(card, &cmdcmsg); - return 0; - } else { - printk(KERN_ERR "capidrv-%d: chan %ld disconnect request while waiting for CONNECT_CONF\n", - card->contrnr, - c->arg); - return -EINVAL; - } - } - printk(KERN_ERR "capidrv-%d: chan %ld disconnect request on free channel\n", - card->contrnr, - c->arg); - return -EINVAL; -/* ready */ - - case ISDN_CMD_SETL2: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: set L2 on chan %ld to %ld\n", - card->contrnr, - (c->arg & 0xff), (c->arg >> 8)); - bchan = &card->bchans[(c->arg & 0xff) % card->nbchan]; - bchan->l2 = (c->arg >> 8); - return 0; - - case ISDN_CMD_SETL3: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: set L3 on chan %ld to %ld\n", - card->contrnr, - (c->arg & 0xff), (c->arg >> 8)); - bchan = &card->bchans[(c->arg & 0xff) % card->nbchan]; - bchan->l3 = (c->arg >> 8); - return 0; - - case ISDN_CMD_SETEAZ: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: set EAZ \"%s\" on chan %ld\n", - card->contrnr, - c->parm.num, c->arg); - bchan = &card->bchans[c->arg % card->nbchan]; - strncpy(bchan->msn, c->parm.num, ISDN_MSNLEN); - return 0; - - case ISDN_CMD_CLREAZ: - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: clearing EAZ on chan %ld\n", - card->contrnr, c->arg); - bchan = &card->bchans[c->arg % card->nbchan]; - bchan->msn[0] = 0; - return 0; - - default: - printk(KERN_ERR "capidrv-%d: ISDN_CMD_%d, Huh?\n", - card->contrnr, c->command); - return -EINVAL; - } - return 0; -} - -static int if_command(isdn_ctrl *c) -{ - capidrv_contr *card = findcontrbydriverid(c->driver); - - if (card) - return capidrv_command(c, card); - - printk(KERN_ERR - "capidrv: if_command %d called with invalid driverId %d!\n", - c->command, c->driver); - return -ENODEV; -} - -static _cmsg sendcmsg; - -static int if_sendbuf(int id, int channel, int doack, struct sk_buff *skb) -{ - capidrv_contr *card = findcontrbydriverid(id); - capidrv_bchan *bchan; - capidrv_ncci *nccip; - int len = skb->len; - int msglen; - u16 errcode; - u16 datahandle; - u32 data; - - if (!card) { - printk(KERN_ERR "capidrv: if_sendbuf called with invalid driverId %d!\n", - id); - return 0; - } - if (debugmode > 4) - printk(KERN_DEBUG "capidrv-%d: sendbuf len=%d skb=%p doack=%d\n", - card->contrnr, len, skb, doack); - bchan = &card->bchans[channel % card->nbchan]; - nccip = bchan->nccip; - if (!nccip || nccip->state != ST_NCCI_ACTIVE) { - printk(KERN_ERR "capidrv-%d: if_sendbuf: %s:%d: chan not up!\n", - card->contrnr, card->name, channel); - return 0; - } - datahandle = nccip->datahandle; - - /* - * Here we copy pointer skb->data into the 32-bit 'Data' field. - * The 'Data' field is not used in practice in linux kernel - * (neither in 32 or 64 bit), but should have some value, - * since a CAPI message trace will display it. - * - * The correct value in the 32 bit case is the address of the - * data, in 64 bit it makes no sense, we use 0 there. - */ - -#ifdef CONFIG_64BIT - data = 0; -#else - data = (unsigned long) skb->data; -#endif - - capi_fill_DATA_B3_REQ(&sendcmsg, global.ap.applid, card->msgid++, - nccip->ncci, /* adr */ - data, /* Data */ - skb->len, /* DataLength */ - datahandle, /* DataHandle */ - 0 /* Flags */ - ); - - if (capidrv_add_ack(nccip, datahandle, doack ? (int)skb->len : -1) < 0) - return 0; - - if (capi_cmsg2message(&sendcmsg, sendcmsg.buf)) { - printk(KERN_ERR "capidrv-%d: if_sendbuf: parser failure\n", - card->contrnr); - return -EINVAL; - } - msglen = CAPIMSG_LEN(sendcmsg.buf); - if (skb_headroom(skb) < msglen) { - struct sk_buff *nskb = skb_realloc_headroom(skb, msglen); - if (!nskb) { - printk(KERN_ERR "capidrv-%d: if_sendbuf: no memory\n", - card->contrnr); - (void)capidrv_del_ack(nccip, datahandle); - return 0; - } - printk(KERN_DEBUG "capidrv-%d: only %d bytes headroom, need %d\n", - card->contrnr, skb_headroom(skb), msglen); - memcpy(skb_push(nskb, msglen), sendcmsg.buf, msglen); - errcode = capi20_put_message(&global.ap, nskb); - if (errcode == CAPI_NOERROR) { - dev_kfree_skb(skb); - nccip->datahandle++; - return len; - } - if (debugmode > 3) - printk(KERN_DEBUG "capidrv-%d: sendbuf putmsg ret(%x) - %s\n", - card->contrnr, errcode, capi_info2str(errcode)); - (void)capidrv_del_ack(nccip, datahandle); - dev_kfree_skb(nskb); - return errcode == CAPI_SENDQUEUEFULL ? 0 : -1; - } else { - memcpy(skb_push(skb, msglen), sendcmsg.buf, msglen); - errcode = capi20_put_message(&global.ap, skb); - if (errcode == CAPI_NOERROR) { - nccip->datahandle++; - return len; - } - if (debugmode > 3) - printk(KERN_DEBUG "capidrv-%d: sendbuf putmsg ret(%x) - %s\n", - card->contrnr, errcode, capi_info2str(errcode)); - skb_pull(skb, msglen); - (void)capidrv_del_ack(nccip, datahandle); - return errcode == CAPI_SENDQUEUEFULL ? 0 : -1; - } -} - -static int if_readstat(u8 __user *buf, int len, int id, int channel) -{ - capidrv_contr *card = findcontrbydriverid(id); - int count; - u8 __user *p; - - if (!card) { - printk(KERN_ERR "capidrv: if_readstat called with invalid driverId %d!\n", - id); - return -ENODEV; - } - - for (p = buf, count = 0; count < len; p++, count++) { - if (put_user(*card->q931_read++, p)) - return -EFAULT; - if (card->q931_read > card->q931_end) - card->q931_read = card->q931_buf; - } - return count; - -} - -static void enable_dchannel_trace(capidrv_contr *card) -{ - u8 manufacturer[CAPI_MANUFACTURER_LEN]; - capi_version version; - u16 contr = card->contrnr; - u16 errcode; - u16 avmversion[3]; - - errcode = capi20_get_manufacturer(contr, manufacturer); - if (errcode != CAPI_NOERROR) { - printk(KERN_ERR "%s: can't get manufacturer (0x%x)\n", - card->name, errcode); - return; - } - if (strstr(manufacturer, "AVM") == NULL) { - printk(KERN_ERR "%s: not from AVM, no d-channel trace possible (%s)\n", - card->name, manufacturer); - return; - } - errcode = capi20_get_version(contr, &version); - if (errcode != CAPI_NOERROR) { - printk(KERN_ERR "%s: can't get version (0x%x)\n", - card->name, errcode); - return; - } - avmversion[0] = (version.majormanuversion >> 4) & 0x0f; - avmversion[1] = (version.majormanuversion << 4) & 0xf0; - avmversion[1] |= (version.minormanuversion >> 4) & 0x0f; - avmversion[2] |= version.minormanuversion & 0x0f; - - if (avmversion[0] > 3 || (avmversion[0] == 3 && avmversion[1] > 5)) { - printk(KERN_INFO "%s: D2 trace enabled\n", card->name); - capi_fill_MANUFACTURER_REQ(&cmdcmsg, global.ap.applid, - card->msgid++, - contr, - 0x214D5641, /* ManuID */ - 0, /* Class */ - 1, /* Function */ - (_cstruct)"\004\200\014\000\000"); - } else { - printk(KERN_INFO "%s: D3 trace enabled\n", card->name); - capi_fill_MANUFACTURER_REQ(&cmdcmsg, global.ap.applid, - card->msgid++, - contr, - 0x214D5641, /* ManuID */ - 0, /* Class */ - 1, /* Function */ - (_cstruct)"\004\002\003\000\000"); - } - send_message(card, &cmdcmsg); -} - - -static void send_listen(capidrv_contr *card) -{ - capi_fill_LISTEN_REQ(&cmdcmsg, global.ap.applid, - card->msgid++, - card->contrnr, /* controller */ - 1 << 6, /* Infomask */ - card->cipmask, - card->cipmask2, - NULL, NULL); - listen_change_state(card, EV_LISTEN_REQ); - send_message(card, &cmdcmsg); -} - -static void listentimerfunc(struct timer_list *t) -{ - capidrv_contr *card = from_timer(card, t, listentimer); - if (card->state != ST_LISTEN_NONE && card->state != ST_LISTEN_ACTIVE) - printk(KERN_ERR "%s: controller dead ??\n", card->name); - send_listen(card); - mod_timer(&card->listentimer, jiffies + 60 * HZ); -} - - -static int capidrv_addcontr(u16 contr, struct capi_profile *profp) -{ - capidrv_contr *card; - unsigned long flags; - isdn_ctrl cmd; - char id[20]; - int i; - - sprintf(id, "capidrv-%d", contr); - if (!try_module_get(THIS_MODULE)) { - printk(KERN_WARNING "capidrv: (%s) Could not reserve module\n", id); - return -1; - } - if (!(card = kzalloc(sizeof(capidrv_contr), GFP_ATOMIC))) { - printk(KERN_WARNING - "capidrv: (%s) Could not allocate contr-struct.\n", id); - return -1; - } - card->owner = THIS_MODULE; - timer_setup(&card->listentimer, listentimerfunc, 0); - strcpy(card->name, id); - card->contrnr = contr; - card->nbchan = profp->nbchannel; - card->bchans = kmalloc_array(card->nbchan, sizeof(capidrv_bchan), - GFP_ATOMIC); - if (!card->bchans) { - printk(KERN_WARNING - "capidrv: (%s) Could not allocate bchan-structs.\n", id); - module_put(card->owner); - kfree(card); - return -1; - } - card->interface.channels = profp->nbchannel; - card->interface.maxbufsize = 2048; - card->interface.command = if_command; - card->interface.writebuf_skb = if_sendbuf; - card->interface.writecmd = NULL; - card->interface.readstat = if_readstat; - card->interface.features = - ISDN_FEATURE_L2_HDLC | - ISDN_FEATURE_L2_TRANS | - ISDN_FEATURE_L3_TRANS | - ISDN_FEATURE_P_UNKNOWN | - ISDN_FEATURE_L2_X75I | - ISDN_FEATURE_L2_X75UI | - ISDN_FEATURE_L2_X75BUI; - if (profp->support1 & (1 << 2)) - card->interface.features |= - ISDN_FEATURE_L2_V11096 | - ISDN_FEATURE_L2_V11019 | - ISDN_FEATURE_L2_V11038; - if (profp->support1 & (1 << 8)) - card->interface.features |= ISDN_FEATURE_L2_MODEM; - card->interface.hl_hdrlen = 22; /* len of DATA_B3_REQ */ - strncpy(card->interface.id, id, sizeof(card->interface.id) - 1); - - - card->q931_read = card->q931_buf; - card->q931_write = card->q931_buf; - card->q931_end = card->q931_buf + sizeof(card->q931_buf) - 1; - - if (!register_isdn(&card->interface)) { - printk(KERN_ERR "capidrv: Unable to register contr %s\n", id); - kfree(card->bchans); - module_put(card->owner); - kfree(card); - return -1; - } - card->myid = card->interface.channels; - memset(card->bchans, 0, sizeof(capidrv_bchan) * card->nbchan); - for (i = 0; i < card->nbchan; i++) { - card->bchans[i].contr = card; - } - - spin_lock_irqsave(&global_lock, flags); - card->next = global.contr_list; - global.contr_list = card; - global.ncontr++; - spin_unlock_irqrestore(&global_lock, flags); - - cmd.command = ISDN_STAT_RUN; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - - card->cipmask = 0x1FFF03FF; /* any */ - card->cipmask2 = 0; - - send_listen(card); - mod_timer(&card->listentimer, jiffies + 60 * HZ); - - printk(KERN_INFO "%s: now up (%d B channels)\n", - card->name, card->nbchan); - - enable_dchannel_trace(card); - - return 0; -} - -static int capidrv_delcontr(u16 contr) -{ - capidrv_contr **pp, *card; - unsigned long flags; - isdn_ctrl cmd; - - spin_lock_irqsave(&global_lock, flags); - for (card = global.contr_list; card; card = card->next) { - if (card->contrnr == contr) - break; - } - if (!card) { - spin_unlock_irqrestore(&global_lock, flags); - printk(KERN_ERR "capidrv: delcontr: no contr %u\n", contr); - return -1; - } - - /* FIXME: maybe a race condition the card should be removed - * here from global list /kkeil - */ - spin_unlock_irqrestore(&global_lock, flags); - - del_timer(&card->listentimer); - - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: id=%d unloading\n", - card->contrnr, card->myid); - - cmd.command = ISDN_STAT_STOP; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - - while (card->nbchan) { - - cmd.command = ISDN_STAT_DISCH; - cmd.driver = card->myid; - cmd.arg = card->nbchan - 1; - cmd.parm.num[0] = 0; - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: id=%d disable chan=%ld\n", - card->contrnr, card->myid, cmd.arg); - card->interface.statcallb(&cmd); - - if (card->bchans[card->nbchan - 1].nccip) - free_ncci(card, card->bchans[card->nbchan - 1].nccip); - if (card->bchans[card->nbchan - 1].plcip) - free_plci(card, card->bchans[card->nbchan - 1].plcip); - if (card->plci_list) - printk(KERN_ERR "capidrv: bug in free_plci()\n"); - card->nbchan--; - } - kfree(card->bchans); - card->bchans = NULL; - - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: id=%d isdn unload\n", - card->contrnr, card->myid); - - cmd.command = ISDN_STAT_UNLOAD; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - - if (debugmode) - printk(KERN_DEBUG "capidrv-%d: id=%d remove contr from list\n", - card->contrnr, card->myid); - - spin_lock_irqsave(&global_lock, flags); - for (pp = &global.contr_list; *pp; pp = &(*pp)->next) { - if (*pp == card) { - *pp = (*pp)->next; - card->next = NULL; - global.ncontr--; - break; - } - } - spin_unlock_irqrestore(&global_lock, flags); - - module_put(card->owner); - printk(KERN_INFO "%s: now down.\n", card->name); - kfree(card); - return 0; -} - - -static int -lower_callback(struct notifier_block *nb, unsigned long val, void *v) -{ - capi_profile profile; - u32 contr = (long)v; - - switch (val) { - case CAPICTR_UP: - printk(KERN_INFO "capidrv: controller %hu up\n", contr); - if (capi20_get_profile(contr, &profile) == CAPI_NOERROR) - (void) capidrv_addcontr(contr, &profile); - break; - case CAPICTR_DOWN: - printk(KERN_INFO "capidrv: controller %hu down\n", contr); - (void) capidrv_delcontr(contr); - break; - } - return NOTIFY_OK; -} - -/* - * /proc/capi/capidrv: - * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt - */ -static int __maybe_unused capidrv_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%lu %lu %lu %lu\n", - global.ap.nrecvctlpkt, - global.ap.nrecvdatapkt, - global.ap.nsentctlpkt, - global.ap.nsentdatapkt); - return 0; -} - -static void __init proc_init(void) -{ - proc_create_single("capi/capidrv", 0, NULL, capidrv_proc_show); -} - -static void __exit proc_exit(void) -{ - remove_proc_entry("capi/capidrv", NULL); -} - -static struct notifier_block capictr_nb = { - .notifier_call = lower_callback, -}; - -static int __init capidrv_init(void) -{ - capi_profile profile; - u32 ncontr, contr; - u16 errcode; - - global.ap.rparam.level3cnt = -2; /* number of bchannels twice */ - global.ap.rparam.datablkcnt = 16; - global.ap.rparam.datablklen = 2048; - - global.ap.recv_message = capidrv_recv_message; - errcode = capi20_register(&global.ap); - if (errcode) { - return -EIO; - } - - register_capictr_notifier(&capictr_nb); - - errcode = capi20_get_profile(0, &profile); - if (errcode != CAPI_NOERROR) { - unregister_capictr_notifier(&capictr_nb); - capi20_release(&global.ap); - return -EIO; - } - - ncontr = profile.ncontroller; - for (contr = 1; contr <= ncontr; contr++) { - errcode = capi20_get_profile(contr, &profile); - if (errcode != CAPI_NOERROR) - continue; - (void) capidrv_addcontr(contr, &profile); - } - proc_init(); - - return 0; -} - -static void __exit capidrv_exit(void) -{ - unregister_capictr_notifier(&capictr_nb); - capi20_release(&global.ap); - - proc_exit(); -} - -module_init(capidrv_init); -module_exit(capidrv_exit); diff --git a/drivers/isdn/capi/capidrv.h b/drivers/isdn/capi/capidrv.h deleted file mode 100644 index 4466b2e0176d..000000000000 --- a/drivers/isdn/capi/capidrv.h +++ /dev/null @@ -1,140 +0,0 @@ -/* $Id: capidrv.h,v 1.2.8.2 2001/09/23 22:24:33 kai Exp $ - * - * ISDN4Linux Driver, using capi20 interface (kernelcapi) - * - * Copyright 1997 by Carsten Paeth - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef __CAPIDRV_H__ -#define __CAPIDRV_H__ - -/* - * LISTEN state machine - */ -#define ST_LISTEN_NONE 0 /* L-0 */ -#define ST_LISTEN_WAIT_CONF 1 /* L-0.1 */ -#define ST_LISTEN_ACTIVE 2 /* L-1 */ -#define ST_LISTEN_ACTIVE_WAIT_CONF 3 /* L-1.1 */ - - -#define EV_LISTEN_REQ 1 /* L-0 -> L-0.1 - L-1 -> L-1.1 */ -#define EV_LISTEN_CONF_ERROR 2 /* L-0.1 -> L-0 - L-1.1 -> L-1 */ -#define EV_LISTEN_CONF_EMPTY 3 /* L-0.1 -> L-0 - L-1.1 -> L-0 */ -#define EV_LISTEN_CONF_OK 4 /* L-0.1 -> L-1 - L-1.1 -> L.1 */ - -/* - * per plci state machine - */ -#define ST_PLCI_NONE 0 /* P-0 */ -#define ST_PLCI_OUTGOING 1 /* P-0.1 */ -#define ST_PLCI_ALLOCATED 2 /* P-1 */ -#define ST_PLCI_ACTIVE 3 /* P-ACT */ -#define ST_PLCI_INCOMING 4 /* P-2 */ -#define ST_PLCI_FACILITY_IND 5 /* P-3 */ -#define ST_PLCI_ACCEPTING 6 /* P-4 */ -#define ST_PLCI_DISCONNECTING 7 /* P-5 */ -#define ST_PLCI_DISCONNECTED 8 /* P-6 */ -#define ST_PLCI_RESUMEING 9 /* P-0.Res */ -#define ST_PLCI_RESUME 10 /* P-Res */ -#define ST_PLCI_HELD 11 /* P-HELD */ - -#define EV_PLCI_CONNECT_REQ 1 /* P-0 -> P-0.1 - */ -#define EV_PLCI_CONNECT_CONF_ERROR 2 /* P-0.1 -> P-0 - */ -#define EV_PLCI_CONNECT_CONF_OK 3 /* P-0.1 -> P-1 - */ -#define EV_PLCI_FACILITY_IND_UP 4 /* P-0 -> P-1 - */ -#define EV_PLCI_CONNECT_IND 5 /* P-0 -> P-2 - */ -#define EV_PLCI_CONNECT_ACTIVE_IND 6 /* P-1 -> P-ACT - */ -#define EV_PLCI_CONNECT_REJECT 7 /* P-2 -> P-5 - P-3 -> P-5 - */ -#define EV_PLCI_DISCONNECT_REQ 8 /* P-1 -> P-5 - P-2 -> P-5 - P-3 -> P-5 - P-4 -> P-5 - P-ACT -> P-5 - P-Res -> P-5 (*) - P-HELD -> P-5 (*) - */ -#define EV_PLCI_DISCONNECT_IND 9 /* P-1 -> P-6 - P-2 -> P-6 - P-3 -> P-6 - P-4 -> P-6 - P-5 -> P-6 - P-ACT -> P-6 - P-Res -> P-6 (*) - P-HELD -> P-6 (*) - */ -#define EV_PLCI_FACILITY_IND_DOWN 10 /* P-0.1 -> P-5 - P-1 -> P-5 - P-ACT -> P-5 - P-2 -> P-5 - P-3 -> P-5 - P-4 -> P-5 - */ -#define EV_PLCI_DISCONNECT_RESP 11 /* P-6 -> P-0 - */ -#define EV_PLCI_CONNECT_RESP 12 /* P-6 -> P-0 - */ - -#define EV_PLCI_RESUME_REQ 13 /* P-0 -> P-0.Res - */ -#define EV_PLCI_RESUME_CONF_OK 14 /* P-0.Res -> P-Res - */ -#define EV_PLCI_RESUME_CONF_ERROR 15 /* P-0.Res -> P-0 - */ -#define EV_PLCI_RESUME_IND 16 /* P-Res -> P-ACT - */ -#define EV_PLCI_HOLD_IND 17 /* P-ACT -> P-HELD - */ -#define EV_PLCI_RETRIEVE_IND 18 /* P-HELD -> P-ACT - */ -#define EV_PLCI_SUSPEND_IND 19 /* P-ACT -> P-5 - */ -#define EV_PLCI_CD_IND 20 /* P-2 -> P-5 - */ - -/* - * per ncci state machine - */ -#define ST_NCCI_PREVIOUS -1 -#define ST_NCCI_NONE 0 /* N-0 */ -#define ST_NCCI_OUTGOING 1 /* N-0.1 */ -#define ST_NCCI_INCOMING 2 /* N-1 */ -#define ST_NCCI_ALLOCATED 3 /* N-2 */ -#define ST_NCCI_ACTIVE 4 /* N-ACT */ -#define ST_NCCI_RESETING 5 /* N-3 */ -#define ST_NCCI_DISCONNECTING 6 /* N-4 */ -#define ST_NCCI_DISCONNECTED 7 /* N-5 */ - -#define EV_NCCI_CONNECT_B3_REQ 1 /* N-0 -> N-0.1 */ -#define EV_NCCI_CONNECT_B3_IND 2 /* N-0 -> N.1 */ -#define EV_NCCI_CONNECT_B3_CONF_OK 3 /* N-0.1 -> N.2 */ -#define EV_NCCI_CONNECT_B3_CONF_ERROR 4 /* N-0.1 -> N.0 */ -#define EV_NCCI_CONNECT_B3_REJECT 5 /* N-1 -> N-4 */ -#define EV_NCCI_CONNECT_B3_RESP 6 /* N-1 -> N-2 */ -#define EV_NCCI_CONNECT_B3_ACTIVE_IND 7 /* N-2 -> N-ACT */ -#define EV_NCCI_RESET_B3_REQ 8 /* N-ACT -> N-3 */ -#define EV_NCCI_RESET_B3_IND 9 /* N-3 -> N-ACT */ -#define EV_NCCI_DISCONNECT_B3_IND 10 /* N-4 -> N.5 */ -#define EV_NCCI_DISCONNECT_B3_CONF_ERROR 11 /* N-4 -> previous */ -#define EV_NCCI_DISCONNECT_B3_REQ 12 /* N-1 -> N-4 - N-2 -> N-4 - N-3 -> N-4 - N-ACT -> N-4 */ -#define EV_NCCI_DISCONNECT_B3_RESP 13 /* N-5 -> N-0 */ - -#endif /* __CAPIDRV_H__ */ diff --git a/drivers/isdn/divert/Makefile b/drivers/isdn/divert/Makefile deleted file mode 100644 index 07684fe53537..000000000000 --- a/drivers/isdn/divert/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# Makefile for the dss1_divert ISDN module - -# Each configuration option enables a list of files. - -obj-$(CONFIG_ISDN_DIVERSION) += dss1_divert.o - -# Multipart objects. - -dss1_divert-y := isdn_divert.o divert_procfs.o divert_init.o diff --git a/drivers/isdn/divert/divert_init.c b/drivers/isdn/divert/divert_init.c deleted file mode 100644 index 267dede13bfd..000000000000 --- a/drivers/isdn/divert/divert_init.c +++ /dev/null @@ -1,82 +0,0 @@ -/* $Id divert_init.c,v 1.5.6.2 2001/01/24 22:18:17 kai Exp $ - * - * Module init for DSS1 diversion services for i4l. - * - * Copyright 1999 by Werner Cornelius (werner@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include - -#include "isdn_divert.h" - -MODULE_DESCRIPTION("ISDN4Linux: Call diversion support"); -MODULE_AUTHOR("Werner Cornelius"); -MODULE_LICENSE("GPL"); - -/****************************************/ -/* structure containing interface to hl */ -/****************************************/ -isdn_divert_if divert_if = { - DIVERT_IF_MAGIC, /* magic value */ - DIVERT_CMD_REG, /* register cmd */ - ll_callback, /* callback routine from ll */ - NULL, /* command still not specified */ - NULL, /* drv_to_name */ - NULL, /* name_to_drv */ -}; - -/*************************/ -/* Module interface code */ -/* no cmd line parms */ -/*************************/ -static int __init divert_init(void) -{ - int i; - - if (divert_dev_init()) { - printk(KERN_WARNING "dss1_divert: cannot install device, not loaded\n"); - return (-EIO); - } - if ((i = DIVERT_REG_NAME(&divert_if)) != DIVERT_NO_ERR) { - divert_dev_deinit(); - printk(KERN_WARNING "dss1_divert: error %d registering module, not loaded\n", i); - return (-EIO); - } - printk(KERN_INFO "dss1_divert module successfully installed\n"); - return (0); -} - -/**********************/ -/* Module deinit code */ -/**********************/ -static void __exit divert_exit(void) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&divert_lock, flags); - divert_if.cmd = DIVERT_CMD_REL; /* release */ - if ((i = DIVERT_REG_NAME(&divert_if)) != DIVERT_NO_ERR) { - printk(KERN_WARNING "dss1_divert: error %d releasing module\n", i); - spin_unlock_irqrestore(&divert_lock, flags); - return; - } - if (divert_dev_deinit()) { - printk(KERN_WARNING "dss1_divert: device busy, remove cancelled\n"); - spin_unlock_irqrestore(&divert_lock, flags); - return; - } - spin_unlock_irqrestore(&divert_lock, flags); - deleterule(-1); /* delete all rules and free mem */ - deleteprocs(); - printk(KERN_INFO "dss1_divert module successfully removed \n"); -} - -module_init(divert_init); -module_exit(divert_exit); diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c deleted file mode 100644 index 342585e04fd3..000000000000 --- a/drivers/isdn/divert/divert_procfs.c +++ /dev/null @@ -1,336 +0,0 @@ -/* $Id: divert_procfs.c,v 1.11.6.2 2001/09/23 22:24:36 kai Exp $ - * - * Filesystem handling for the diversion supplementary services. - * - * Copyright 1998 by Werner Cornelius (werner@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#ifdef CONFIG_PROC_FS -#include -#else -#include -#endif -#include -#include -#include -#include -#include "isdn_divert.h" - - -/*********************************/ -/* Variables for interface queue */ -/*********************************/ -ulong if_used = 0; /* number of interface users */ -static DEFINE_MUTEX(isdn_divert_mutex); -static struct divert_info *divert_info_head = NULL; /* head of queue */ -static struct divert_info *divert_info_tail = NULL; /* pointer to last entry */ -static DEFINE_SPINLOCK(divert_info_lock);/* lock for queue */ -static wait_queue_head_t rd_queue; - -/*********************************/ -/* put an info buffer into queue */ -/*********************************/ -void -put_info_buffer(char *cp) -{ - struct divert_info *ib; - unsigned long flags; - - if (if_used <= 0) - return; - if (!cp) - return; - if (!*cp) - return; - if (!(ib = kmalloc(sizeof(struct divert_info) + strlen(cp), GFP_ATOMIC))) - return; /* no memory */ - strcpy(ib->info_start, cp); /* set output string */ - ib->next = NULL; - spin_lock_irqsave(&divert_info_lock, flags); - ib->usage_cnt = if_used; - if (!divert_info_head) - divert_info_head = ib; /* new head */ - else - divert_info_tail->next = ib; /* follows existing messages */ - divert_info_tail = ib; /* new tail */ - - /* delete old entrys */ - while (divert_info_head->next) { - if ((divert_info_head->usage_cnt <= 0) && - (divert_info_head->next->usage_cnt <= 0)) { - ib = divert_info_head; - divert_info_head = divert_info_head->next; - kfree(ib); - } else - break; - } /* divert_info_head->next */ - spin_unlock_irqrestore(&divert_info_lock, flags); - wake_up_interruptible(&(rd_queue)); -} /* put_info_buffer */ - -#ifdef CONFIG_PROC_FS - -/**********************************/ -/* deflection device read routine */ -/**********************************/ -static ssize_t -isdn_divert_read(struct file *file, char __user *buf, size_t count, loff_t *off) -{ - struct divert_info *inf; - int len; - - if (!(inf = *((struct divert_info **) file->private_data))) { - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - wait_event_interruptible(rd_queue, (inf = - *((struct divert_info **) file->private_data))); - } - if (!inf) - return (0); - - inf->usage_cnt--; /* new usage count */ - file->private_data = &inf->next; /* next structure */ - if ((len = strlen(inf->info_start)) <= count) { - if (copy_to_user(buf, inf->info_start, len)) - return -EFAULT; - *off += len; - return (len); - } - return (0); -} /* isdn_divert_read */ - -/**********************************/ -/* deflection device write routine */ -/**********************************/ -static ssize_t -isdn_divert_write(struct file *file, const char __user *buf, size_t count, loff_t *off) -{ - return (-ENODEV); -} /* isdn_divert_write */ - - -/***************************************/ -/* select routines for various kernels */ -/***************************************/ -static __poll_t -isdn_divert_poll(struct file *file, poll_table *wait) -{ - __poll_t mask = 0; - - poll_wait(file, &(rd_queue), wait); - /* mask = EPOLLOUT | EPOLLWRNORM; */ - if (*((struct divert_info **) file->private_data)) { - mask |= EPOLLIN | EPOLLRDNORM; - } - return mask; -} /* isdn_divert_poll */ - -/****************/ -/* Open routine */ -/****************/ -static int -isdn_divert_open(struct inode *ino, struct file *filep) -{ - unsigned long flags; - - spin_lock_irqsave(&divert_info_lock, flags); - if_used++; - if (divert_info_head) - filep->private_data = &(divert_info_tail->next); - else - filep->private_data = &divert_info_head; - spin_unlock_irqrestore(&divert_info_lock, flags); - /* start_divert(); */ - return nonseekable_open(ino, filep); -} /* isdn_divert_open */ - -/*******************/ -/* close routine */ -/*******************/ -static int -isdn_divert_close(struct inode *ino, struct file *filep) -{ - struct divert_info *inf; - unsigned long flags; - - spin_lock_irqsave(&divert_info_lock, flags); - if_used--; - inf = *((struct divert_info **) filep->private_data); - while (inf) { - inf->usage_cnt--; - inf = inf->next; - } - if (if_used <= 0) - while (divert_info_head) { - inf = divert_info_head; - divert_info_head = divert_info_head->next; - kfree(inf); - } - spin_unlock_irqrestore(&divert_info_lock, flags); - return (0); -} /* isdn_divert_close */ - -/*********/ -/* IOCTL */ -/*********/ -static int isdn_divert_ioctl_unlocked(struct file *file, uint cmd, ulong arg) -{ - divert_ioctl dioctl; - int i; - unsigned long flags; - divert_rule *rulep; - char *cp; - - if (copy_from_user(&dioctl, (void __user *) arg, sizeof(dioctl))) - return -EFAULT; - - switch (cmd) { - case IIOCGETVER: - dioctl.drv_version = DIVERT_IIOC_VERSION; /* set version */ - break; - - case IIOCGETDRV: - if ((dioctl.getid.drvid = divert_if.name_to_drv(dioctl.getid.drvnam)) < 0) - return (-EINVAL); - break; - - case IIOCGETNAM: - cp = divert_if.drv_to_name(dioctl.getid.drvid); - if (!cp) - return (-EINVAL); - if (!*cp) - return (-EINVAL); - strcpy(dioctl.getid.drvnam, cp); - break; - - case IIOCGETRULE: - if (!(rulep = getruleptr(dioctl.getsetrule.ruleidx))) - return (-EINVAL); - dioctl.getsetrule.rule = *rulep; /* copy data */ - break; - - case IIOCMODRULE: - if (!(rulep = getruleptr(dioctl.getsetrule.ruleidx))) - return (-EINVAL); - spin_lock_irqsave(&divert_lock, flags); - *rulep = dioctl.getsetrule.rule; /* copy data */ - spin_unlock_irqrestore(&divert_lock, flags); - return (0); /* no copy required */ - break; - - case IIOCINSRULE: - return (insertrule(dioctl.getsetrule.ruleidx, &dioctl.getsetrule.rule)); - break; - - case IIOCDELRULE: - return (deleterule(dioctl.getsetrule.ruleidx)); - break; - - case IIOCDODFACT: - return (deflect_extern_action(dioctl.fwd_ctrl.subcmd, - dioctl.fwd_ctrl.callid, - dioctl.fwd_ctrl.to_nr)); - - case IIOCDOCFACT: - case IIOCDOCFDIS: - case IIOCDOCFINT: - if (!divert_if.drv_to_name(dioctl.cf_ctrl.drvid)) - return (-EINVAL); /* invalid driver */ - if (strnlen(dioctl.cf_ctrl.msn, sizeof(dioctl.cf_ctrl.msn)) == - sizeof(dioctl.cf_ctrl.msn)) - return -EINVAL; - if (strnlen(dioctl.cf_ctrl.fwd_nr, sizeof(dioctl.cf_ctrl.fwd_nr)) == - sizeof(dioctl.cf_ctrl.fwd_nr)) - return -EINVAL; - if ((i = cf_command(dioctl.cf_ctrl.drvid, - (cmd == IIOCDOCFACT) ? 1 : (cmd == IIOCDOCFDIS) ? 0 : 2, - dioctl.cf_ctrl.cfproc, - dioctl.cf_ctrl.msn, - dioctl.cf_ctrl.service, - dioctl.cf_ctrl.fwd_nr, - &dioctl.cf_ctrl.procid))) - return (i); - break; - - default: - return (-EINVAL); - } /* switch cmd */ - return copy_to_user((void __user *)arg, &dioctl, sizeof(dioctl)) ? -EFAULT : 0; -} /* isdn_divert_ioctl */ - -static long isdn_divert_ioctl(struct file *file, uint cmd, ulong arg) -{ - long ret; - - mutex_lock(&isdn_divert_mutex); - ret = isdn_divert_ioctl_unlocked(file, cmd, arg); - mutex_unlock(&isdn_divert_mutex); - - return ret; -} - -static const struct file_operations isdn_fops = -{ - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = isdn_divert_read, - .write = isdn_divert_write, - .poll = isdn_divert_poll, - .unlocked_ioctl = isdn_divert_ioctl, - .open = isdn_divert_open, - .release = isdn_divert_close, -}; - -/****************************/ -/* isdn subdir in /proc/net */ -/****************************/ -static struct proc_dir_entry *isdn_proc_entry = NULL; -static struct proc_dir_entry *isdn_divert_entry = NULL; -#endif /* CONFIG_PROC_FS */ - -/***************************************************************************/ -/* divert_dev_init must be called before the proc filesystem may be used */ -/***************************************************************************/ -int -divert_dev_init(void) -{ - - init_waitqueue_head(&rd_queue); - -#ifdef CONFIG_PROC_FS - isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net); - if (!isdn_proc_entry) - return (-1); - isdn_divert_entry = proc_create("divert", S_IFREG | S_IRUGO, - isdn_proc_entry, &isdn_fops); - if (!isdn_divert_entry) { - remove_proc_entry("isdn", init_net.proc_net); - return (-1); - } -#endif /* CONFIG_PROC_FS */ - - return (0); -} /* divert_dev_init */ - -/***************************************************************************/ -/* divert_dev_deinit must be called before leaving isdn when included as */ -/* a module. */ -/***************************************************************************/ -int -divert_dev_deinit(void) -{ - -#ifdef CONFIG_PROC_FS - remove_proc_entry("divert", isdn_proc_entry); - remove_proc_entry("isdn", init_net.proc_net); -#endif /* CONFIG_PROC_FS */ - - return (0); -} /* divert_dev_deinit */ diff --git a/drivers/isdn/divert/isdn_divert.c b/drivers/isdn/divert/isdn_divert.c deleted file mode 100644 index 5620fd2c6009..000000000000 --- a/drivers/isdn/divert/isdn_divert.c +++ /dev/null @@ -1,846 +0,0 @@ -/* $Id: isdn_divert.c,v 1.6.6.3 2001/09/23 22:24:36 kai Exp $ - * - * DSS1 main diversion supplementary handling for i4l. - * - * Copyright 1999 by Werner Cornelius (werner@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include - -#include "isdn_divert.h" - -/**********************************/ -/* structure keeping calling info */ -/**********************************/ -struct call_struc { - isdn_ctrl ics; /* delivered setup + driver parameters */ - ulong divert_id; /* Id delivered to user */ - unsigned char akt_state; /* actual state */ - char deflect_dest[35]; /* deflection destination */ - struct timer_list timer; /* timer control structure */ - char info[90]; /* device info output */ - struct call_struc *next; /* pointer to next entry */ - struct call_struc *prev; -}; - - -/********************************************/ -/* structure keeping deflection table entry */ -/********************************************/ -struct deflect_struc { - struct deflect_struc *next, *prev; - divert_rule rule; /* used rule */ -}; - - -/*****************************************/ -/* variables for main diversion services */ -/*****************************************/ -/* diversion/deflection processes */ -static struct call_struc *divert_head = NULL; /* head of remembered entrys */ -static ulong next_id = 1; /* next info id */ -static struct deflect_struc *table_head = NULL; -static struct deflect_struc *table_tail = NULL; -static unsigned char extern_wait_max = 4; /* maximum wait in s for external process */ - -DEFINE_SPINLOCK(divert_lock); - -/***************************/ -/* timer callback function */ -/***************************/ -static void deflect_timer_expire(struct timer_list *t) -{ - unsigned long flags; - struct call_struc *cs = from_timer(cs, t, timer); - - spin_lock_irqsave(&divert_lock, flags); - del_timer(&cs->timer); /* delete active timer */ - spin_unlock_irqrestore(&divert_lock, flags); - - switch (cs->akt_state) { - case DEFLECT_PROCEED: - cs->ics.command = ISDN_CMD_HANGUP; /* cancel action */ - divert_if.ll_cmd(&cs->ics); - spin_lock_irqsave(&divert_lock, flags); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - break; - - case DEFLECT_ALERT: - cs->ics.command = ISDN_CMD_REDIR; /* protocol */ - strlcpy(cs->ics.parm.setup.phone, cs->deflect_dest, sizeof(cs->ics.parm.setup.phone)); - strcpy(cs->ics.parm.setup.eazmsn, "Testtext delayed"); - divert_if.ll_cmd(&cs->ics); - spin_lock_irqsave(&divert_lock, flags); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - break; - - case DEFLECT_AUTODEL: - default: - spin_lock_irqsave(&divert_lock, flags); - if (cs->prev) - cs->prev->next = cs->next; /* forward link */ - else - divert_head = cs->next; - if (cs->next) - cs->next->prev = cs->prev; /* back link */ - spin_unlock_irqrestore(&divert_lock, flags); - kfree(cs); - return; - - } /* switch */ -} /* deflect_timer_func */ - - -/*****************************************/ -/* handle call forwarding de/activations */ -/* 0 = deact, 1 = act, 2 = interrogate */ -/*****************************************/ -int cf_command(int drvid, int mode, - u_char proc, char *msn, - u_char service, char *fwd_nr, ulong *procid) -{ - unsigned long flags; - int retval, msnlen; - int fwd_len; - char *p, *ielenp, tmp[60]; - struct call_struc *cs; - - if (strchr(msn, '.')) return (-EINVAL); /* subaddress not allowed in msn */ - if ((proc & 0x7F) > 2) return (-EINVAL); - proc &= 3; - p = tmp; - *p++ = 0x30; /* enumeration */ - ielenp = p++; /* remember total length position */ - *p++ = 0xa; /* proc tag */ - *p++ = 1; /* length */ - *p++ = proc & 0x7F; /* procedure to de/activate/interrogate */ - *p++ = 0xa; /* service tag */ - *p++ = 1; /* length */ - *p++ = service; /* service to handle */ - - if (mode == 1) { - if (!*fwd_nr) return (-EINVAL); /* destination missing */ - if (strchr(fwd_nr, '.')) return (-EINVAL); /* subaddress not allowed */ - fwd_len = strlen(fwd_nr); - *p++ = 0x30; /* number enumeration */ - *p++ = fwd_len + 2; /* complete forward to len */ - *p++ = 0x80; /* fwd to nr */ - *p++ = fwd_len; /* length of number */ - strcpy(p, fwd_nr); /* copy number */ - p += fwd_len; /* pointer beyond fwd */ - } /* activate */ - - msnlen = strlen(msn); - *p++ = 0x80; /* msn number */ - if (msnlen > 1) { - *p++ = msnlen; /* length */ - strcpy(p, msn); - p += msnlen; - } else - *p++ = 0; - - *ielenp = p - ielenp - 1; /* set total IE length */ - - /* allocate mem for information struct */ - if (!(cs = kmalloc(sizeof(struct call_struc), GFP_ATOMIC))) - return (-ENOMEM); /* no memory */ - timer_setup(&cs->timer, deflect_timer_expire, 0); - cs->info[0] = '\0'; - cs->ics.driver = drvid; - cs->ics.command = ISDN_CMD_PROT_IO; /* protocol specific io */ - cs->ics.arg = DSS1_CMD_INVOKE; /* invoke supplementary service */ - cs->ics.parm.dss1_io.proc = (mode == 1) ? 7 : (mode == 2) ? 11 : 8; /* operation */ - cs->ics.parm.dss1_io.timeout = 4000; /* from ETS 300 207-1 */ - cs->ics.parm.dss1_io.datalen = p - tmp; /* total len */ - cs->ics.parm.dss1_io.data = tmp; /* start of buffer */ - - spin_lock_irqsave(&divert_lock, flags); - cs->ics.parm.dss1_io.ll_id = next_id++; /* id for callback */ - spin_unlock_irqrestore(&divert_lock, flags); - *procid = cs->ics.parm.dss1_io.ll_id; - - sprintf(cs->info, "%d 0x%lx %s%s 0 %s %02x %d%s%s\n", - (!mode) ? DIVERT_DEACTIVATE : (mode == 1) ? DIVERT_ACTIVATE : DIVERT_REPORT, - cs->ics.parm.dss1_io.ll_id, - (mode != 2) ? "" : "0 ", - divert_if.drv_to_name(cs->ics.driver), - msn, - service & 0xFF, - proc, - (mode != 1) ? "" : " 0 ", - (mode != 1) ? "" : fwd_nr); - - retval = divert_if.ll_cmd(&cs->ics); /* execute command */ - - if (!retval) { - cs->prev = NULL; - spin_lock_irqsave(&divert_lock, flags); - cs->next = divert_head; - divert_head = cs; - spin_unlock_irqrestore(&divert_lock, flags); - } else - kfree(cs); - return (retval); -} /* cf_command */ - - -/****************************************/ -/* handle a external deflection command */ -/****************************************/ -int deflect_extern_action(u_char cmd, ulong callid, char *to_nr) -{ - struct call_struc *cs; - isdn_ctrl ic; - unsigned long flags; - int i; - - if ((cmd & 0x7F) > 2) return (-EINVAL); /* invalid command */ - cs = divert_head; /* start of parameter list */ - while (cs) { - if (cs->divert_id == callid) break; /* found */ - cs = cs->next; - } /* search entry */ - if (!cs) return (-EINVAL); /* invalid callid */ - - ic.driver = cs->ics.driver; - ic.arg = cs->ics.arg; - i = -EINVAL; - if (cs->akt_state == DEFLECT_AUTODEL) return (i); /* no valid call */ - switch (cmd & 0x7F) { - case 0: /* hangup */ - del_timer(&cs->timer); - ic.command = ISDN_CMD_HANGUP; - i = divert_if.ll_cmd(&ic); - spin_lock_irqsave(&divert_lock, flags); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - break; - - case 1: /* alert */ - if (cs->akt_state == DEFLECT_ALERT) return (0); - cmd &= 0x7F; /* never wait */ - del_timer(&cs->timer); - ic.command = ISDN_CMD_ALERT; - if ((i = divert_if.ll_cmd(&ic))) { - spin_lock_irqsave(&divert_lock, flags); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - } else - cs->akt_state = DEFLECT_ALERT; - break; - - case 2: /* redir */ - del_timer(&cs->timer); - strlcpy(cs->ics.parm.setup.phone, to_nr, sizeof(cs->ics.parm.setup.phone)); - strcpy(cs->ics.parm.setup.eazmsn, "Testtext manual"); - ic.command = ISDN_CMD_REDIR; - if ((i = divert_if.ll_cmd(&ic))) { - spin_lock_irqsave(&divert_lock, flags); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - } else - cs->akt_state = DEFLECT_ALERT; - break; - - } /* switch */ - return (i); -} /* deflect_extern_action */ - -/********************************/ -/* insert a new rule before idx */ -/********************************/ -int insertrule(int idx, divert_rule *newrule) -{ - struct deflect_struc *ds, *ds1 = NULL; - unsigned long flags; - - if (!(ds = kmalloc(sizeof(struct deflect_struc), GFP_KERNEL))) - return (-ENOMEM); /* no memory */ - - ds->rule = *newrule; /* set rule */ - - spin_lock_irqsave(&divert_lock, flags); - - if (idx >= 0) { - ds1 = table_head; - while ((ds1) && (idx > 0)) - { idx--; - ds1 = ds1->next; - } - if (!ds1) idx = -1; - } - - if (idx < 0) { - ds->prev = table_tail; /* previous entry */ - ds->next = NULL; /* end of chain */ - if (ds->prev) - ds->prev->next = ds; /* last forward */ - else - table_head = ds; /* is first entry */ - table_tail = ds; /* end of queue */ - } else { - ds->next = ds1; /* next entry */ - ds->prev = ds1->prev; /* prev entry */ - ds1->prev = ds; /* backward chain old element */ - if (!ds->prev) - table_head = ds; /* first element */ - } - - spin_unlock_irqrestore(&divert_lock, flags); - return (0); -} /* insertrule */ - -/***********************************/ -/* delete the rule at position idx */ -/***********************************/ -int deleterule(int idx) -{ - struct deflect_struc *ds, *ds1; - unsigned long flags; - - if (idx < 0) { - spin_lock_irqsave(&divert_lock, flags); - ds = table_head; - table_head = NULL; - table_tail = NULL; - spin_unlock_irqrestore(&divert_lock, flags); - while (ds) { - ds1 = ds; - ds = ds->next; - kfree(ds1); - } - return (0); - } - - spin_lock_irqsave(&divert_lock, flags); - ds = table_head; - - while ((ds) && (idx > 0)) { - idx--; - ds = ds->next; - } - - if (!ds) { - spin_unlock_irqrestore(&divert_lock, flags); - return (-EINVAL); - } - - if (ds->next) - ds->next->prev = ds->prev; /* backward chain */ - else - table_tail = ds->prev; /* end of chain */ - - if (ds->prev) - ds->prev->next = ds->next; /* forward chain */ - else - table_head = ds->next; /* start of chain */ - - spin_unlock_irqrestore(&divert_lock, flags); - kfree(ds); - return (0); -} /* deleterule */ - -/*******************************************/ -/* get a pointer to a specific rule number */ -/*******************************************/ -divert_rule *getruleptr(int idx) -{ - struct deflect_struc *ds = table_head; - - if (idx < 0) return (NULL); - while ((ds) && (idx >= 0)) { - if (!(idx--)) { - return (&ds->rule); - break; - } - ds = ds->next; - } - return (NULL); -} /* getruleptr */ - -/*************************************************/ -/* called from common module on an incoming call */ -/*************************************************/ -static int isdn_divert_icall(isdn_ctrl *ic) -{ - int retval = 0; - unsigned long flags; - struct call_struc *cs = NULL; - struct deflect_struc *dv; - char *p, *p1; - u_char accept; - - /* first check the internal deflection table */ - for (dv = table_head; dv; dv = dv->next) { - /* scan table */ - if (((dv->rule.callopt == 1) && (ic->command == ISDN_STAT_ICALLW)) || - ((dv->rule.callopt == 2) && (ic->command == ISDN_STAT_ICALL))) - continue; /* call option check */ - if (!(dv->rule.drvid & (1L << ic->driver))) - continue; /* driver not matching */ - if ((dv->rule.si1) && (dv->rule.si1 != ic->parm.setup.si1)) - continue; /* si1 not matching */ - if ((dv->rule.si2) && (dv->rule.si2 != ic->parm.setup.si2)) - continue; /* si2 not matching */ - - p = dv->rule.my_msn; - p1 = ic->parm.setup.eazmsn; - accept = 0; - while (*p) { - /* complete compare */ - if (*p == '-') { - accept = 1; /* call accepted */ - break; - } - if (*p++ != *p1++) - break; /* not accepted */ - if ((!*p) && (!*p1)) - accept = 1; - } /* complete compare */ - if (!accept) continue; /* not accepted */ - - if ((strcmp(dv->rule.caller, "0")) || - (ic->parm.setup.phone[0])) { - p = dv->rule.caller; - p1 = ic->parm.setup.phone; - accept = 0; - while (*p) { - /* complete compare */ - if (*p == '-') { - accept = 1; /* call accepted */ - break; - } - if (*p++ != *p1++) - break; /* not accepted */ - if ((!*p) && (!*p1)) - accept = 1; - } /* complete compare */ - if (!accept) continue; /* not accepted */ - } - - switch (dv->rule.action) { - case DEFLECT_IGNORE: - return 0; - - case DEFLECT_ALERT: - case DEFLECT_PROCEED: - case DEFLECT_REPORT: - case DEFLECT_REJECT: - if (dv->rule.action == DEFLECT_PROCEED) - if ((!if_used) || ((!extern_wait_max) && (!dv->rule.waittime))) - return (0); /* no external deflection needed */ - if (!(cs = kmalloc(sizeof(struct call_struc), GFP_ATOMIC))) - return (0); /* no memory */ - timer_setup(&cs->timer, deflect_timer_expire, 0); - cs->info[0] = '\0'; - - cs->ics = *ic; /* copy incoming data */ - if (!cs->ics.parm.setup.phone[0]) strcpy(cs->ics.parm.setup.phone, "0"); - if (!cs->ics.parm.setup.eazmsn[0]) strcpy(cs->ics.parm.setup.eazmsn, "0"); - cs->ics.parm.setup.screen = dv->rule.screen; - if (dv->rule.waittime) - cs->timer.expires = jiffies + (HZ * dv->rule.waittime); - else if (dv->rule.action == DEFLECT_PROCEED) - cs->timer.expires = jiffies + (HZ * extern_wait_max); - else - cs->timer.expires = 0; - cs->akt_state = dv->rule.action; - spin_lock_irqsave(&divert_lock, flags); - cs->divert_id = next_id++; /* new sequence number */ - spin_unlock_irqrestore(&divert_lock, flags); - cs->prev = NULL; - if (cs->akt_state == DEFLECT_ALERT) { - strcpy(cs->deflect_dest, dv->rule.to_nr); - if (!cs->timer.expires) { - strcpy(ic->parm.setup.eazmsn, - "Testtext direct"); - ic->parm.setup.screen = dv->rule.screen; - strlcpy(ic->parm.setup.phone, dv->rule.to_nr, sizeof(ic->parm.setup.phone)); - cs->akt_state = DEFLECT_AUTODEL; /* delete after timeout */ - cs->timer.expires = jiffies + (HZ * AUTODEL_TIME); - retval = 5; - } else - retval = 1; /* alerting */ - } else { - cs->deflect_dest[0] = '\0'; - retval = 4; /* only proceed */ - } - snprintf(cs->info, sizeof(cs->info), - "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n", - cs->akt_state, - cs->divert_id, - divert_if.drv_to_name(cs->ics.driver), - (ic->command == ISDN_STAT_ICALLW) ? "1" : "0", - cs->ics.parm.setup.phone, - cs->ics.parm.setup.eazmsn, - cs->ics.parm.setup.si1, - cs->ics.parm.setup.si2, - cs->ics.parm.setup.screen, - dv->rule.waittime, - cs->deflect_dest); - if ((dv->rule.action == DEFLECT_REPORT) || - (dv->rule.action == DEFLECT_REJECT)) { - put_info_buffer(cs->info); - kfree(cs); /* remove */ - return ((dv->rule.action == DEFLECT_REPORT) ? 0 : 2); /* nothing to do */ - } - break; - - default: - return 0; /* ignore call */ - } /* switch action */ - break; /* will break the 'for' looping */ - } /* scan_table */ - - if (cs) { - cs->prev = NULL; - spin_lock_irqsave(&divert_lock, flags); - cs->next = divert_head; - divert_head = cs; - if (cs->timer.expires) add_timer(&cs->timer); - spin_unlock_irqrestore(&divert_lock, flags); - - put_info_buffer(cs->info); - return (retval); - } else - return (0); -} /* isdn_divert_icall */ - - -void deleteprocs(void) -{ - struct call_struc *cs, *cs1; - unsigned long flags; - - spin_lock_irqsave(&divert_lock, flags); - cs = divert_head; - divert_head = NULL; - while (cs) { - del_timer(&cs->timer); - cs1 = cs; - cs = cs->next; - kfree(cs1); - } - spin_unlock_irqrestore(&divert_lock, flags); -} /* deleteprocs */ - -/****************************************************/ -/* put a address including address type into buffer */ -/****************************************************/ -static int put_address(char *st, u_char *p, int len) -{ - u_char retval = 0; - u_char adr_typ = 0; /* network standard */ - - if (len < 2) return (retval); - if (*p == 0xA1) { - retval = *(++p) + 2; /* total length */ - if (retval > len) return (0); /* too short */ - len = retval - 2; /* remaining length */ - if (len < 3) return (0); - if ((*(++p) != 0x0A) || (*(++p) != 1)) return (0); - adr_typ = *(++p); - len -= 3; - p++; - if (len < 2) return (0); - if (*p++ != 0x12) return (0); - if (*p > len) return (0); /* check number length */ - len = *p++; - } else if (*p == 0x80) { - retval = *(++p) + 2; /* total length */ - if (retval > len) return (0); - len = retval - 2; - p++; - } else - return (0); /* invalid address information */ - - sprintf(st, "%d ", adr_typ); - st += strlen(st); - if (!len) - *st++ = '-'; - else - while (len--) - *st++ = *p++; - *st = '\0'; - return (retval); -} /* put_address */ - -/*************************************/ -/* report a successful interrogation */ -/*************************************/ -static int interrogate_success(isdn_ctrl *ic, struct call_struc *cs) -{ - char *src = ic->parm.dss1_io.data; - int restlen = ic->parm.dss1_io.datalen; - int cnt = 1; - u_char n, n1; - char st[90], *p, *stp; - - if (restlen < 2) return (-100); /* frame too short */ - if (*src++ != 0x30) return (-101); - if ((n = *src++) > 0x81) return (-102); /* invalid length field */ - restlen -= 2; /* remaining bytes */ - if (n == 0x80) { - if (restlen < 2) return (-103); - if ((*(src + restlen - 1)) || (*(src + restlen - 2))) return (-104); - restlen -= 2; - } else if (n == 0x81) { - n = *src++; - restlen--; - if (n > restlen) return (-105); - restlen = n; - } else if (n > restlen) - return (-106); - else - restlen = n; /* standard format */ - if (restlen < 3) return (-107); /* no procedure */ - if ((*src++ != 2) || (*src++ != 1) || (*src++ != 0x0B)) return (-108); - restlen -= 3; - if (restlen < 2) return (-109); /* list missing */ - if (*src == 0x31) { - src++; - if ((n = *src++) > 0x81) return (-110); /* invalid length field */ - restlen -= 2; /* remaining bytes */ - if (n == 0x80) { - if (restlen < 2) return (-111); - if ((*(src + restlen - 1)) || (*(src + restlen - 2))) return (-112); - restlen -= 2; - } else if (n == 0x81) { - n = *src++; - restlen--; - if (n > restlen) return (-113); - restlen = n; - } else if (n > restlen) - return (-114); - else - restlen = n; /* standard format */ - } /* result list header */ - - while (restlen >= 2) { - stp = st; - sprintf(stp, "%d 0x%lx %d %s ", DIVERT_REPORT, ic->parm.dss1_io.ll_id, - cnt++, divert_if.drv_to_name(ic->driver)); - stp += strlen(stp); - if (*src++ != 0x30) return (-115); /* invalid enum */ - n = *src++; - restlen -= 2; - if (n > restlen) return (-116); /* enum length wrong */ - restlen -= n; - p = src; /* one entry */ - src += n; - if (!(n1 = put_address(stp, p, n & 0xFF))) continue; - stp += strlen(stp); - p += n1; - n -= n1; - if (n < 6) continue; /* no service and proc */ - if ((*p++ != 0x0A) || (*p++ != 1)) continue; - sprintf(stp, " 0x%02x ", (*p++) & 0xFF); - stp += strlen(stp); - if ((*p++ != 0x0A) || (*p++ != 1)) continue; - sprintf(stp, "%d ", (*p++) & 0xFF); - stp += strlen(stp); - n -= 6; - if (n > 2) { - if (*p++ != 0x30) continue; - if (*p > (n - 2)) continue; - n = *p++; - if (!(n1 = put_address(stp, p, n & 0xFF))) continue; - stp += strlen(stp); - } - sprintf(stp, "\n"); - put_info_buffer(st); - } /* while restlen */ - if (restlen) return (-117); - return (0); -} /* interrogate_success */ - -/*********************************************/ -/* callback for protocol specific extensions */ -/*********************************************/ -static int prot_stat_callback(isdn_ctrl *ic) -{ - struct call_struc *cs, *cs1; - int i; - unsigned long flags; - - cs = divert_head; /* start of list */ - cs1 = NULL; - while (cs) { - if (ic->driver == cs->ics.driver) { - switch (cs->ics.arg) { - case DSS1_CMD_INVOKE: - if ((cs->ics.parm.dss1_io.ll_id == ic->parm.dss1_io.ll_id) && - (cs->ics.parm.dss1_io.hl_id == ic->parm.dss1_io.hl_id)) { - switch (ic->arg) { - case DSS1_STAT_INVOKE_ERR: - sprintf(cs->info, "128 0x%lx 0x%x\n", - ic->parm.dss1_io.ll_id, - ic->parm.dss1_io.timeout); - put_info_buffer(cs->info); - break; - - case DSS1_STAT_INVOKE_RES: - switch (cs->ics.parm.dss1_io.proc) { - case 7: - case 8: - put_info_buffer(cs->info); - break; - - case 11: - i = interrogate_success(ic, cs); - if (i) - sprintf(cs->info, "%d 0x%lx %d\n", DIVERT_REPORT, - ic->parm.dss1_io.ll_id, i); - put_info_buffer(cs->info); - break; - - default: - printk(KERN_WARNING "dss1_divert: unknown proc %d\n", cs->ics.parm.dss1_io.proc); - break; - } - - break; - - default: - printk(KERN_WARNING "dss1_divert unknown invoke answer %lx\n", ic->arg); - break; - } - cs1 = cs; /* remember structure */ - cs = NULL; - continue; /* abort search */ - } /* id found */ - break; - - case DSS1_CMD_INVOKE_ABORT: - printk(KERN_WARNING "dss1_divert unhandled invoke abort\n"); - break; - - default: - printk(KERN_WARNING "dss1_divert unknown cmd 0x%lx\n", cs->ics.arg); - break; - } /* switch ics.arg */ - cs = cs->next; - } /* driver ok */ - } - - if (!cs1) { - printk(KERN_WARNING "dss1_divert unhandled process\n"); - return (0); - } - - if (cs1->ics.driver == -1) { - spin_lock_irqsave(&divert_lock, flags); - del_timer(&cs1->timer); - if (cs1->prev) - cs1->prev->next = cs1->next; /* forward link */ - else - divert_head = cs1->next; - if (cs1->next) - cs1->next->prev = cs1->prev; /* back link */ - spin_unlock_irqrestore(&divert_lock, flags); - kfree(cs1); - } - - return (0); -} /* prot_stat_callback */ - - -/***************************/ -/* status callback from HL */ -/***************************/ -static int isdn_divert_stat_callback(isdn_ctrl *ic) -{ - struct call_struc *cs, *cs1; - unsigned long flags; - int retval; - - retval = -1; - cs = divert_head; /* start of list */ - while (cs) { - if ((ic->driver == cs->ics.driver) && - (ic->arg == cs->ics.arg)) { - switch (ic->command) { - case ISDN_STAT_DHUP: - sprintf(cs->info, "129 0x%lx\n", cs->divert_id); - del_timer(&cs->timer); - cs->ics.driver = -1; - break; - - case ISDN_STAT_CAUSE: - sprintf(cs->info, "130 0x%lx %s\n", cs->divert_id, ic->parm.num); - break; - - case ISDN_STAT_REDIR: - sprintf(cs->info, "131 0x%lx\n", cs->divert_id); - del_timer(&cs->timer); - cs->ics.driver = -1; - break; - - default: - sprintf(cs->info, "999 0x%lx 0x%x\n", cs->divert_id, (int)(ic->command)); - break; - } - put_info_buffer(cs->info); - retval = 0; - } - cs1 = cs; - cs = cs->next; - if (cs1->ics.driver == -1) { - spin_lock_irqsave(&divert_lock, flags); - if (cs1->prev) - cs1->prev->next = cs1->next; /* forward link */ - else - divert_head = cs1->next; - if (cs1->next) - cs1->next->prev = cs1->prev; /* back link */ - spin_unlock_irqrestore(&divert_lock, flags); - kfree(cs1); - } - } - return (retval); /* not found */ -} /* isdn_divert_stat_callback */ - - -/********************/ -/* callback from ll */ -/********************/ -int ll_callback(isdn_ctrl *ic) -{ - switch (ic->command) { - case ISDN_STAT_ICALL: - case ISDN_STAT_ICALLW: - return (isdn_divert_icall(ic)); - break; - - case ISDN_STAT_PROT: - if ((ic->arg & 0xFF) == ISDN_PTYPE_EURO) { - if (ic->arg != DSS1_STAT_INVOKE_BRD) - return (prot_stat_callback(ic)); - else - return (0); /* DSS1 invoke broadcast */ - } else - return (-1); /* protocol not euro */ - - default: - return (isdn_divert_stat_callback(ic)); - } -} /* ll_callback */ diff --git a/drivers/isdn/divert/isdn_divert.h b/drivers/isdn/divert/isdn_divert.h deleted file mode 100644 index 55033dd872c0..000000000000 --- a/drivers/isdn/divert/isdn_divert.h +++ /dev/null @@ -1,132 +0,0 @@ -/* $Id: isdn_divert.h,v 1.5.6.1 2001/09/23 22:24:36 kai Exp $ - * - * Header for the diversion supplementary ioctl interface. - * - * Copyright 1998 by Werner Cornelius (werner@ikt.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include - -/******************************************/ -/* IOCTL codes for interface to user prog */ -/******************************************/ -#define DIVERT_IIOC_VERSION 0x01 /* actual version */ -#define IIOCGETVER _IO('I', 1) /* get version of interface */ -#define IIOCGETDRV _IO('I', 2) /* get driver number */ -#define IIOCGETNAM _IO('I', 3) /* get driver name */ -#define IIOCGETRULE _IO('I', 4) /* read one rule */ -#define IIOCMODRULE _IO('I', 5) /* modify/replace a rule */ -#define IIOCINSRULE _IO('I', 6) /* insert/append one rule */ -#define IIOCDELRULE _IO('I', 7) /* delete a rule */ -#define IIOCDODFACT _IO('I', 8) /* hangup/reject/alert/immediately deflect a call */ -#define IIOCDOCFACT _IO('I', 9) /* activate control forwarding in PBX */ -#define IIOCDOCFDIS _IO('I', 10) /* deactivate control forwarding in PBX */ -#define IIOCDOCFINT _IO('I', 11) /* interrogate control forwarding in PBX */ - -/*************************************/ -/* states reported through interface */ -/*************************************/ -#define DEFLECT_IGNORE 0 /* ignore incoming call */ -#define DEFLECT_REPORT 1 /* only report */ -#define DEFLECT_PROCEED 2 /* deflect when externally triggered */ -#define DEFLECT_ALERT 3 /* alert and deflect after delay */ -#define DEFLECT_REJECT 4 /* reject immediately */ -#define DIVERT_ACTIVATE 5 /* diversion activate */ -#define DIVERT_DEACTIVATE 6 /* diversion deactivate */ -#define DIVERT_REPORT 7 /* interrogation result */ -#define DEFLECT_AUTODEL 255 /* only for internal use */ - -#define DEFLECT_ALL_IDS 0xFFFFFFFF /* all drivers selected */ - -typedef struct { - ulong drvid; /* driver ids, bit mapped */ - char my_msn[35]; /* desired msn, subaddr allowed */ - char caller[35]; /* caller id, partial string with * + subaddr allowed */ - char to_nr[35]; /* deflected to number incl. subaddress */ - u_char si1, si2; /* service indicators, si1=bitmask, si1+2 0 = all */ - u_char screen; /* screening: 0 = no info, 1 = info, 2 = nfo with nr */ - u_char callopt; /* option for call handling: - 0 = all calls - 1 = only non waiting calls - 2 = only waiting calls */ - u_char action; /* desired action: - 0 = don't report call -> ignore - 1 = report call, do not allow/proceed for deflection - 2 = report call, send proceed, wait max waittime secs - 3 = report call, alert and deflect after waittime - 4 = report call, reject immediately - actions 1-2 only take place if interface is opened - */ - u_char waittime; /* maximum wait time for proceeding */ -} divert_rule; - -typedef union { - int drv_version; /* return of driver version */ - struct { - int drvid; /* id of driver */ - char drvnam[30]; /* name of driver */ - } getid; - struct { - int ruleidx; /* index of rule */ - divert_rule rule; /* rule parms */ - } getsetrule; - struct { - u_char subcmd; /* 0 = hangup/reject, - 1 = alert, - 2 = deflect */ - ulong callid; /* id of call delivered by ascii output */ - char to_nr[35]; /* destination when deflect, - else uus1 string (maxlen 31), - data from rule used if empty */ - } fwd_ctrl; - struct { - int drvid; /* id of driver */ - u_char cfproc; /* cfu = 0, cfb = 1, cfnr = 2 */ - ulong procid; /* process id returned when no error */ - u_char service; /* basically coded service, 0 = all */ - char msn[25]; /* desired msn, empty = all */ - char fwd_nr[35];/* forwarded to number + subaddress */ - } cf_ctrl; -} divert_ioctl; - -#ifdef __KERNEL__ - -#include -#include - -#define AUTODEL_TIME 30 /* timeout in s to delete internal entries */ - -/**************************************************/ -/* structure keeping ascii info for device output */ -/**************************************************/ -struct divert_info { - struct divert_info *next; - ulong usage_cnt; /* number of files still to work */ - char info_start[2]; /* info string start */ -}; - - -/**************/ -/* Prototypes */ -/**************/ -extern spinlock_t divert_lock; - -extern ulong if_used; /* number of interface users */ -extern int divert_dev_deinit(void); -extern int divert_dev_init(void); -extern void put_info_buffer(char *); -extern int ll_callback(isdn_ctrl *); -extern isdn_divert_if divert_if; -extern divert_rule *getruleptr(int); -extern int insertrule(int, divert_rule *); -extern int deleterule(int); -extern void deleteprocs(void); -extern int deflect_extern_action(u_char, ulong, char *); -extern int cf_command(int, int, u_char, char *, u_char, char *, ulong *); - -#endif /* __KERNEL__ */ diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig deleted file mode 100644 index cacde8de38a3..000000000000 --- a/drivers/isdn/i4l/Kconfig +++ /dev/null @@ -1,127 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Old ISDN4Linux config -# - -if ISDN_I4L - -config ISDN_PPP - bool "Support synchronous PPP" - depends on INET - select SLHC - help - Over digital connections such as ISDN, there is no need to - synchronize sender and recipient's clocks with start and stop bits - as is done over analog telephone lines. Instead, one can use - "synchronous PPP". Saying Y here will include this protocol. This - protocol is used by Cisco and Sun for example. So you want to say Y - here if the other end of your ISDN connection supports it. You will - need a special version of pppd (called ipppd) for using this - feature. See and - for more information. - -config ISDN_PPP_VJ - bool "Use VJ-compression with synchronous PPP" - depends on ISDN_PPP - help - This enables Van Jacobson header compression for synchronous PPP. - Say Y if the other end of the connection supports it. - -config ISDN_MPP - bool "Support generic MP (RFC 1717)" - depends on ISDN_PPP - help - With synchronous PPP enabled, it is possible to increase throughput - by bundling several ISDN-connections, using this protocol. See - for more information. - -config IPPP_FILTER - bool "Filtering for synchronous PPP" - depends on ISDN_PPP - help - Say Y here if you want to be able to filter the packets passing over - IPPP interfaces. This allows you to control which packets count as - activity (i.e. which packets will reset the idle timer or bring up - a demand-dialled link) and which packets are to be dropped entirely. - You need to say Y here if you wish to use the pass-filter and - active-filter options to ipppd. - -config ISDN_PPP_BSDCOMP - tristate "Support BSD compression" - depends on ISDN_PPP - help - Support for the BSD-Compress compression method for PPP, which uses - the LZW compression method to compress each PPP packet before it is - sent over the wire. The machine at the other end of the PPP link - (usually your ISP) has to support the BSD-Compress compression - method as well for this to be useful. Even if they don't support it, - it is safe to say Y here. - -config ISDN_AUDIO - bool "Support audio via ISDN" - help - If you say Y here, the modem-emulator will support a subset of the - EIA Class 8 Voice commands. Using a getty with voice-support - (mgetty+sendfax by with an extension, available - with the ISDN utility package for example), you will be able to use - your Linux box as an ISDN-answering machine. Of course, this must be - supported by the lowlevel driver also. Currently, the HiSax driver - is the only voice-supporting driver. See - for more information. - -config ISDN_TTY_FAX - bool "Support AT-Fax Class 1 and 2 commands" - depends on ISDN_AUDIO - help - If you say Y here, the modem-emulator will support a subset of the - Fax Class 1 and 2 commands. Using a getty with fax-support - (mgetty+sendfax, hylafax), you will be able to use your Linux box as - an ISDN-fax-machine. This must be supported by the lowlevel driver - also. See for more information. - -config ISDN_X25 - bool "X.25 PLP on top of ISDN" - depends on X25 - help - This feature provides the X.25 protocol over ISDN connections. - See for more information - if you are thinking about using this. - - -menu "ISDN feature submodules" - -config ISDN_DRV_LOOP - tristate "isdnloop support" - depends on BROKEN_ON_SMP - help - This driver provides a virtual ISDN card. Its primary purpose is - testing of linklevel features or configuration without getting - charged by your service-provider for lots of phone calls. - You need will need the loopctrl utility from the latest isdn4k-utils - package to set up this driver. - -config ISDN_DIVERSION - tristate "Support isdn diversion services" - help - This option allows you to use some supplementary diversion - services in conjunction with the HiSax driver on an EURO/DSS1 - line. - - Supported options are CD (call deflection), CFU (Call forward - unconditional), CFB (Call forward when busy) and CFNR (call forward - not reachable). Additionally the actual CFU, CFB and CFNR state may - be interrogated. - - The use of CFU, CFB, CFNR and interrogation may be limited to some - countries. The keypad protocol is still not implemented. CD should - work in all countries if the service has been subscribed to. - - Please read the file . - -endmenu - -comment "ISDN4Linux hardware drivers" - -# end ISDN_I4L -endif - diff --git a/drivers/isdn/i4l/Makefile b/drivers/isdn/i4l/Makefile index be77500c9e86..11fe697739d5 100644 --- a/drivers/isdn/i4l/Makefile +++ b/drivers/isdn/i4l/Makefile @@ -3,18 +3,4 @@ # Each configuration option enables a list of files. -obj-$(CONFIG_ISDN_I4L) += isdn.o -obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o obj-$(CONFIG_ISDN_HDLC) += isdnhdlc.o - -# Multipart objects. - -isdn-y := isdn_net.o isdn_tty.o isdn_v110.o isdn_common.o - -# Optional parts of multipart objects. - -isdn-$(CONFIG_ISDN_PPP) += isdn_ppp.o -isdn-$(CONFIG_ISDN_X25) += isdn_concap.o isdn_x25iface.o -isdn-$(CONFIG_ISDN_AUDIO) += isdn_audio.o -isdn-$(CONFIG_ISDN_TTY_FAX) += isdn_ttyfax.o - diff --git a/drivers/isdn/i4l/isdn_audio.c b/drivers/isdn/i4l/isdn_audio.c deleted file mode 100644 index b6bcd1eca128..000000000000 --- a/drivers/isdn/i4l/isdn_audio.c +++ /dev/null @@ -1,711 +0,0 @@ -/* $Id: isdn_audio.c,v 1.1.2.2 2004/01/12 22:37:18 keil Exp $ - * - * Linux ISDN subsystem, audio conversion and compression (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * DTMF code (c) 1996 by Christian Mock (cm@kukuruz.ping.at) - * Silence detection (c) 1998 by Armin Schindler (mac@gismo.telekom.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include "isdn_audio.h" -#include "isdn_common.h" - -char *isdn_audio_revision = "$Revision: 1.1.2.2 $"; - -/* - * Misc. lookup-tables. - */ - -/* ulaw -> signed 16-bit */ -static short isdn_audio_ulaw_to_s16[] = -{ - 0x8284, 0x8684, 0x8a84, 0x8e84, 0x9284, 0x9684, 0x9a84, 0x9e84, - 0xa284, 0xa684, 0xaa84, 0xae84, 0xb284, 0xb684, 0xba84, 0xbe84, - 0xc184, 0xc384, 0xc584, 0xc784, 0xc984, 0xcb84, 0xcd84, 0xcf84, - 0xd184, 0xd384, 0xd584, 0xd784, 0xd984, 0xdb84, 0xdd84, 0xdf84, - 0xe104, 0xe204, 0xe304, 0xe404, 0xe504, 0xe604, 0xe704, 0xe804, - 0xe904, 0xea04, 0xeb04, 0xec04, 0xed04, 0xee04, 0xef04, 0xf004, - 0xf0c4, 0xf144, 0xf1c4, 0xf244, 0xf2c4, 0xf344, 0xf3c4, 0xf444, - 0xf4c4, 0xf544, 0xf5c4, 0xf644, 0xf6c4, 0xf744, 0xf7c4, 0xf844, - 0xf8a4, 0xf8e4, 0xf924, 0xf964, 0xf9a4, 0xf9e4, 0xfa24, 0xfa64, - 0xfaa4, 0xfae4, 0xfb24, 0xfb64, 0xfba4, 0xfbe4, 0xfc24, 0xfc64, - 0xfc94, 0xfcb4, 0xfcd4, 0xfcf4, 0xfd14, 0xfd34, 0xfd54, 0xfd74, - 0xfd94, 0xfdb4, 0xfdd4, 0xfdf4, 0xfe14, 0xfe34, 0xfe54, 0xfe74, - 0xfe8c, 0xfe9c, 0xfeac, 0xfebc, 0xfecc, 0xfedc, 0xfeec, 0xfefc, - 0xff0c, 0xff1c, 0xff2c, 0xff3c, 0xff4c, 0xff5c, 0xff6c, 0xff7c, - 0xff88, 0xff90, 0xff98, 0xffa0, 0xffa8, 0xffb0, 0xffb8, 0xffc0, - 0xffc8, 0xffd0, 0xffd8, 0xffe0, 0xffe8, 0xfff0, 0xfff8, 0x0000, - 0x7d7c, 0x797c, 0x757c, 0x717c, 0x6d7c, 0x697c, 0x657c, 0x617c, - 0x5d7c, 0x597c, 0x557c, 0x517c, 0x4d7c, 0x497c, 0x457c, 0x417c, - 0x3e7c, 0x3c7c, 0x3a7c, 0x387c, 0x367c, 0x347c, 0x327c, 0x307c, - 0x2e7c, 0x2c7c, 0x2a7c, 0x287c, 0x267c, 0x247c, 0x227c, 0x207c, - 0x1efc, 0x1dfc, 0x1cfc, 0x1bfc, 0x1afc, 0x19fc, 0x18fc, 0x17fc, - 0x16fc, 0x15fc, 0x14fc, 0x13fc, 0x12fc, 0x11fc, 0x10fc, 0x0ffc, - 0x0f3c, 0x0ebc, 0x0e3c, 0x0dbc, 0x0d3c, 0x0cbc, 0x0c3c, 0x0bbc, - 0x0b3c, 0x0abc, 0x0a3c, 0x09bc, 0x093c, 0x08bc, 0x083c, 0x07bc, - 0x075c, 0x071c, 0x06dc, 0x069c, 0x065c, 0x061c, 0x05dc, 0x059c, - 0x055c, 0x051c, 0x04dc, 0x049c, 0x045c, 0x041c, 0x03dc, 0x039c, - 0x036c, 0x034c, 0x032c, 0x030c, 0x02ec, 0x02cc, 0x02ac, 0x028c, - 0x026c, 0x024c, 0x022c, 0x020c, 0x01ec, 0x01cc, 0x01ac, 0x018c, - 0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, - 0x00f4, 0x00e4, 0x00d4, 0x00c4, 0x00b4, 0x00a4, 0x0094, 0x0084, - 0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, - 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000 -}; - -/* alaw -> signed 16-bit */ -static short isdn_audio_alaw_to_s16[] = -{ - 0x13fc, 0xec04, 0x0144, 0xfebc, 0x517c, 0xae84, 0x051c, 0xfae4, - 0x0a3c, 0xf5c4, 0x0048, 0xffb8, 0x287c, 0xd784, 0x028c, 0xfd74, - 0x1bfc, 0xe404, 0x01cc, 0xfe34, 0x717c, 0x8e84, 0x071c, 0xf8e4, - 0x0e3c, 0xf1c4, 0x00c4, 0xff3c, 0x387c, 0xc784, 0x039c, 0xfc64, - 0x0ffc, 0xf004, 0x0104, 0xfefc, 0x417c, 0xbe84, 0x041c, 0xfbe4, - 0x083c, 0xf7c4, 0x0008, 0xfff8, 0x207c, 0xdf84, 0x020c, 0xfdf4, - 0x17fc, 0xe804, 0x018c, 0xfe74, 0x617c, 0x9e84, 0x061c, 0xf9e4, - 0x0c3c, 0xf3c4, 0x0084, 0xff7c, 0x307c, 0xcf84, 0x030c, 0xfcf4, - 0x15fc, 0xea04, 0x0164, 0xfe9c, 0x597c, 0xa684, 0x059c, 0xfa64, - 0x0b3c, 0xf4c4, 0x0068, 0xff98, 0x2c7c, 0xd384, 0x02cc, 0xfd34, - 0x1dfc, 0xe204, 0x01ec, 0xfe14, 0x797c, 0x8684, 0x07bc, 0xf844, - 0x0f3c, 0xf0c4, 0x00e4, 0xff1c, 0x3c7c, 0xc384, 0x03dc, 0xfc24, - 0x11fc, 0xee04, 0x0124, 0xfedc, 0x497c, 0xb684, 0x049c, 0xfb64, - 0x093c, 0xf6c4, 0x0028, 0xffd8, 0x247c, 0xdb84, 0x024c, 0xfdb4, - 0x19fc, 0xe604, 0x01ac, 0xfe54, 0x697c, 0x9684, 0x069c, 0xf964, - 0x0d3c, 0xf2c4, 0x00a4, 0xff5c, 0x347c, 0xcb84, 0x034c, 0xfcb4, - 0x12fc, 0xed04, 0x0134, 0xfecc, 0x4d7c, 0xb284, 0x04dc, 0xfb24, - 0x09bc, 0xf644, 0x0038, 0xffc8, 0x267c, 0xd984, 0x026c, 0xfd94, - 0x1afc, 0xe504, 0x01ac, 0xfe54, 0x6d7c, 0x9284, 0x06dc, 0xf924, - 0x0dbc, 0xf244, 0x00b4, 0xff4c, 0x367c, 0xc984, 0x036c, 0xfc94, - 0x0f3c, 0xf0c4, 0x00f4, 0xff0c, 0x3e7c, 0xc184, 0x03dc, 0xfc24, - 0x07bc, 0xf844, 0x0008, 0xfff8, 0x1efc, 0xe104, 0x01ec, 0xfe14, - 0x16fc, 0xe904, 0x0174, 0xfe8c, 0x5d7c, 0xa284, 0x05dc, 0xfa24, - 0x0bbc, 0xf444, 0x0078, 0xff88, 0x2e7c, 0xd184, 0x02ec, 0xfd14, - 0x14fc, 0xeb04, 0x0154, 0xfeac, 0x557c, 0xaa84, 0x055c, 0xfaa4, - 0x0abc, 0xf544, 0x0058, 0xffa8, 0x2a7c, 0xd584, 0x02ac, 0xfd54, - 0x1cfc, 0xe304, 0x01cc, 0xfe34, 0x757c, 0x8a84, 0x075c, 0xf8a4, - 0x0ebc, 0xf144, 0x00d4, 0xff2c, 0x3a7c, 0xc584, 0x039c, 0xfc64, - 0x10fc, 0xef04, 0x0114, 0xfeec, 0x457c, 0xba84, 0x045c, 0xfba4, - 0x08bc, 0xf744, 0x0018, 0xffe8, 0x227c, 0xdd84, 0x022c, 0xfdd4, - 0x18fc, 0xe704, 0x018c, 0xfe74, 0x657c, 0x9a84, 0x065c, 0xf9a4, - 0x0cbc, 0xf344, 0x0094, 0xff6c, 0x327c, 0xcd84, 0x032c, 0xfcd4 -}; - -/* alaw -> ulaw */ -static char isdn_audio_alaw_to_ulaw[] = -{ - 0xab, 0x2b, 0xe3, 0x63, 0x8b, 0x0b, 0xc9, 0x49, - 0xba, 0x3a, 0xf6, 0x76, 0x9b, 0x1b, 0xd7, 0x57, - 0xa3, 0x23, 0xdd, 0x5d, 0x83, 0x03, 0xc1, 0x41, - 0xb2, 0x32, 0xeb, 0x6b, 0x93, 0x13, 0xcf, 0x4f, - 0xaf, 0x2f, 0xe7, 0x67, 0x8f, 0x0f, 0xcd, 0x4d, - 0xbe, 0x3e, 0xfe, 0x7e, 0x9f, 0x1f, 0xdb, 0x5b, - 0xa7, 0x27, 0xdf, 0x5f, 0x87, 0x07, 0xc5, 0x45, - 0xb6, 0x36, 0xef, 0x6f, 0x97, 0x17, 0xd3, 0x53, - 0xa9, 0x29, 0xe1, 0x61, 0x89, 0x09, 0xc7, 0x47, - 0xb8, 0x38, 0xf2, 0x72, 0x99, 0x19, 0xd5, 0x55, - 0xa1, 0x21, 0xdc, 0x5c, 0x81, 0x01, 0xbf, 0x3f, - 0xb0, 0x30, 0xe9, 0x69, 0x91, 0x11, 0xce, 0x4e, - 0xad, 0x2d, 0xe5, 0x65, 0x8d, 0x0d, 0xcb, 0x4b, - 0xbc, 0x3c, 0xfa, 0x7a, 0x9d, 0x1d, 0xd9, 0x59, - 0xa5, 0x25, 0xde, 0x5e, 0x85, 0x05, 0xc3, 0x43, - 0xb4, 0x34, 0xed, 0x6d, 0x95, 0x15, 0xd1, 0x51, - 0xac, 0x2c, 0xe4, 0x64, 0x8c, 0x0c, 0xca, 0x4a, - 0xbb, 0x3b, 0xf8, 0x78, 0x9c, 0x1c, 0xd8, 0x58, - 0xa4, 0x24, 0xde, 0x5e, 0x84, 0x04, 0xc2, 0x42, - 0xb3, 0x33, 0xec, 0x6c, 0x94, 0x14, 0xd0, 0x50, - 0xb0, 0x30, 0xe8, 0x68, 0x90, 0x10, 0xce, 0x4e, - 0xbf, 0x3f, 0xfe, 0x7e, 0xa0, 0x20, 0xdc, 0x5c, - 0xa8, 0x28, 0xe0, 0x60, 0x88, 0x08, 0xc6, 0x46, - 0xb7, 0x37, 0xf0, 0x70, 0x98, 0x18, 0xd4, 0x54, - 0xaa, 0x2a, 0xe2, 0x62, 0x8a, 0x0a, 0xc8, 0x48, - 0xb9, 0x39, 0xf4, 0x74, 0x9a, 0x1a, 0xd6, 0x56, - 0xa2, 0x22, 0xdd, 0x5d, 0x82, 0x02, 0xc0, 0x40, - 0xb1, 0x31, 0xea, 0x6a, 0x92, 0x12, 0xcf, 0x4f, - 0xae, 0x2e, 0xe6, 0x66, 0x8e, 0x0e, 0xcc, 0x4c, - 0xbd, 0x3d, 0xfc, 0x7c, 0x9e, 0x1e, 0xda, 0x5a, - 0xa6, 0x26, 0xdf, 0x5f, 0x86, 0x06, 0xc4, 0x44, - 0xb5, 0x35, 0xee, 0x6e, 0x96, 0x16, 0xd2, 0x52 -}; - -/* ulaw -> alaw */ -static char isdn_audio_ulaw_to_alaw[] = -{ - 0xab, 0x55, 0xd5, 0x15, 0x95, 0x75, 0xf5, 0x35, - 0xb5, 0x45, 0xc5, 0x05, 0x85, 0x65, 0xe5, 0x25, - 0xa5, 0x5d, 0xdd, 0x1d, 0x9d, 0x7d, 0xfd, 0x3d, - 0xbd, 0x4d, 0xcd, 0x0d, 0x8d, 0x6d, 0xed, 0x2d, - 0xad, 0x51, 0xd1, 0x11, 0x91, 0x71, 0xf1, 0x31, - 0xb1, 0x41, 0xc1, 0x01, 0x81, 0x61, 0xe1, 0x21, - 0x59, 0xd9, 0x19, 0x99, 0x79, 0xf9, 0x39, 0xb9, - 0x49, 0xc9, 0x09, 0x89, 0x69, 0xe9, 0x29, 0xa9, - 0xd7, 0x17, 0x97, 0x77, 0xf7, 0x37, 0xb7, 0x47, - 0xc7, 0x07, 0x87, 0x67, 0xe7, 0x27, 0xa7, 0xdf, - 0x9f, 0x7f, 0xff, 0x3f, 0xbf, 0x4f, 0xcf, 0x0f, - 0x8f, 0x6f, 0xef, 0x2f, 0x53, 0x13, 0x73, 0x33, - 0xb3, 0x43, 0xc3, 0x03, 0x83, 0x63, 0xe3, 0x23, - 0xa3, 0x5b, 0xdb, 0x1b, 0x9b, 0x7b, 0xfb, 0x3b, - 0xbb, 0xbb, 0x4b, 0x4b, 0xcb, 0xcb, 0x0b, 0x0b, - 0x8b, 0x8b, 0x6b, 0x6b, 0xeb, 0xeb, 0x2b, 0x2b, - 0xab, 0x54, 0xd4, 0x14, 0x94, 0x74, 0xf4, 0x34, - 0xb4, 0x44, 0xc4, 0x04, 0x84, 0x64, 0xe4, 0x24, - 0xa4, 0x5c, 0xdc, 0x1c, 0x9c, 0x7c, 0xfc, 0x3c, - 0xbc, 0x4c, 0xcc, 0x0c, 0x8c, 0x6c, 0xec, 0x2c, - 0xac, 0x50, 0xd0, 0x10, 0x90, 0x70, 0xf0, 0x30, - 0xb0, 0x40, 0xc0, 0x00, 0x80, 0x60, 0xe0, 0x20, - 0x58, 0xd8, 0x18, 0x98, 0x78, 0xf8, 0x38, 0xb8, - 0x48, 0xc8, 0x08, 0x88, 0x68, 0xe8, 0x28, 0xa8, - 0xd6, 0x16, 0x96, 0x76, 0xf6, 0x36, 0xb6, 0x46, - 0xc6, 0x06, 0x86, 0x66, 0xe6, 0x26, 0xa6, 0xde, - 0x9e, 0x7e, 0xfe, 0x3e, 0xbe, 0x4e, 0xce, 0x0e, - 0x8e, 0x6e, 0xee, 0x2e, 0x52, 0x12, 0x72, 0x32, - 0xb2, 0x42, 0xc2, 0x02, 0x82, 0x62, 0xe2, 0x22, - 0xa2, 0x5a, 0xda, 0x1a, 0x9a, 0x7a, 0xfa, 0x3a, - 0xba, 0xba, 0x4a, 0x4a, 0xca, 0xca, 0x0a, 0x0a, - 0x8a, 0x8a, 0x6a, 0x6a, 0xea, 0xea, 0x2a, 0x2a -}; - -#define NCOEFF 8 /* number of frequencies to be analyzed */ -#define DTMF_TRESH 4000 /* above this is dtmf */ -#define SILENCE_TRESH 200 /* below this is silence */ -#define AMP_BITS 9 /* bits per sample, reduced to avoid overflow */ -#define LOGRP 0 -#define HIGRP 1 - -/* For DTMF recognition: - * 2 * cos(2 * PI * k / N) precalculated for all k - */ -static int cos2pik[NCOEFF] = -{ - 55813, 53604, 51193, 48591, 38114, 33057, 25889, 18332 -}; - -static char dtmf_matrix[4][4] = -{ - {'1', '2', '3', 'A'}, - {'4', '5', '6', 'B'}, - {'7', '8', '9', 'C'}, - {'*', '0', '#', 'D'} -}; - -static inline void -isdn_audio_tlookup(const u_char *table, u_char *buff, unsigned long n) -{ -#ifdef __i386__ - unsigned long d0, d1, d2, d3; - __asm__ __volatile__( - "cld\n" - "1:\tlodsb\n\t" - "xlatb\n\t" - "stosb\n\t" - "loop 1b\n\t" - : "=&b"(d0), "=&c"(d1), "=&D"(d2), "=&S"(d3) - : "0"((long) table), "1"(n), "2"((long) buff), "3"((long) buff) - : "memory", "ax"); -#else - while (n--) - *buff = table[*(unsigned char *)buff], buff++; -#endif -} - -void -isdn_audio_ulaw2alaw(unsigned char *buff, unsigned long len) -{ - isdn_audio_tlookup(isdn_audio_ulaw_to_alaw, buff, len); -} - -void -isdn_audio_alaw2ulaw(unsigned char *buff, unsigned long len) -{ - isdn_audio_tlookup(isdn_audio_alaw_to_ulaw, buff, len); -} - -/* - * linear <-> adpcm conversion stuff - * Most parts from the mgetty-package. - * (C) by Gert Doering and Klaus Weidner - * Used by permission of Gert Doering - */ - - -#define ZEROTRAP /* turn on the trap as per the MIL-STD */ -#undef ZEROTRAP -#define BIAS 0x84 /* define the add-in bias for 16 bit samples */ -#define CLIP 32635 - -static unsigned char -isdn_audio_linear2ulaw(int sample) -{ - static int exp_lut[256] = - { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; - int sign, - exponent, - mantissa; - unsigned char ulawbyte; - - /* Get the sample into sign-magnitude. */ - sign = (sample >> 8) & 0x80; /* set aside the sign */ - if (sign != 0) - sample = -sample; /* get magnitude */ - if (sample > CLIP) - sample = CLIP; /* clip the magnitude */ - - /* Convert from 16 bit linear to ulaw. */ - sample = sample + BIAS; - exponent = exp_lut[(sample >> 7) & 0xFF]; - mantissa = (sample >> (exponent + 3)) & 0x0F; - ulawbyte = ~(sign | (exponent << 4) | mantissa); -#ifdef ZEROTRAP - /* optional CCITT trap */ - if (ulawbyte == 0) - ulawbyte = 0x02; -#endif - return (ulawbyte); -} - - -static int Mx[3][8] = -{ - {0x3800, 0x5600, 0, 0, 0, 0, 0, 0}, - {0x399a, 0x3a9f, 0x4d14, 0x6607, 0, 0, 0, 0}, - {0x3556, 0x3556, 0x399A, 0x3A9F, 0x4200, 0x4D14, 0x6607, 0x6607}, -}; - -static int bitmask[9] = -{ - 0, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff -}; - -static int -isdn_audio_get_bits(adpcm_state *s, unsigned char **in, int *len) -{ - while (s->nleft < s->nbits) { - int d = *((*in)++); - (*len)--; - s->word = (s->word << 8) | d; - s->nleft += 8; - } - s->nleft -= s->nbits; - return (s->word >> s->nleft) & bitmask[s->nbits]; -} - -static void -isdn_audio_put_bits(int data, int nbits, adpcm_state *s, - unsigned char **out, int *len) -{ - s->word = (s->word << nbits) | (data & bitmask[nbits]); - s->nleft += nbits; - while (s->nleft >= 8) { - int d = (s->word >> (s->nleft - 8)); - *(out[0]++) = d & 255; - (*len)++; - s->nleft -= 8; - } -} - -adpcm_state * -isdn_audio_adpcm_init(adpcm_state *s, int nbits) -{ - if (!s) - s = kmalloc(sizeof(adpcm_state), GFP_ATOMIC); - if (s) { - s->a = 0; - s->d = 5; - s->word = 0; - s->nleft = 0; - s->nbits = nbits; - } - return s; -} - -dtmf_state * -isdn_audio_dtmf_init(dtmf_state *s) -{ - if (!s) - s = kmalloc(sizeof(dtmf_state), GFP_ATOMIC); - if (s) { - s->idx = 0; - s->last = ' '; - } - return s; -} - -/* - * Decompression of adpcm data to a/u-law - * - */ - -int -isdn_audio_adpcm2xlaw(adpcm_state *s, int fmt, unsigned char *in, - unsigned char *out, int len) -{ - int a = s->a; - int d = s->d; - int nbits = s->nbits; - int olen = 0; - - while (len) { - int e = isdn_audio_get_bits(s, &in, &len); - int sign; - - if (nbits == 4 && e == 0) - d = 4; - sign = (e >> (nbits - 1)) ? -1 : 1; - e &= bitmask[nbits - 1]; - a += sign * ((e << 1) + 1) * d >> 1; - if (d & 1) - a++; - if (fmt) - *out++ = isdn_audio_ulaw_to_alaw[ - isdn_audio_linear2ulaw(a << 2)]; - else - *out++ = isdn_audio_linear2ulaw(a << 2); - olen++; - d = (d * Mx[nbits - 2][e] + 0x2000) >> 14; - if (d < 5) - d = 5; - } - s->a = a; - s->d = d; - return olen; -} - -int -isdn_audio_xlaw2adpcm(adpcm_state *s, int fmt, unsigned char *in, - unsigned char *out, int len) -{ - int a = s->a; - int d = s->d; - int nbits = s->nbits; - int olen = 0; - - while (len--) { - int e = 0, - nmax = 1 << (nbits - 1); - int sign, - delta; - - if (fmt) - delta = (isdn_audio_alaw_to_s16[*in++] >> 2) - a; - else - delta = (isdn_audio_ulaw_to_s16[*in++] >> 2) - a; - if (delta < 0) { - e = nmax; - delta = -delta; - } - while (--nmax && delta > d) { - delta -= d; - e++; - } - if (nbits == 4 && ((e & 0x0f) == 0)) - e = 8; - isdn_audio_put_bits(e, nbits, s, &out, &olen); - sign = (e >> (nbits - 1)) ? -1 : 1; - e &= bitmask[nbits - 1]; - - a += sign * ((e << 1) + 1) * d >> 1; - if (d & 1) - a++; - d = (d * Mx[nbits - 2][e] + 0x2000) >> 14; - if (d < 5) - d = 5; - } - s->a = a; - s->d = d; - return olen; -} - -/* - * Goertzel algorithm. - * See http://ptolemy.eecs.berkeley.edu/papers/96/dtmf_ict/ - * for more info. - * Result is stored into an sk_buff and queued up for later - * evaluation. - */ -static void -isdn_audio_goertzel(int *sample, modem_info *info) -{ - int sk, - sk1, - sk2; - int k, - n; - struct sk_buff *skb; - int *result; - - skb = dev_alloc_skb(sizeof(int) * NCOEFF); - if (!skb) { - printk(KERN_WARNING - "isdn_audio: Could not alloc DTMF result for ttyI%d\n", - info->line); - return; - } - result = skb_put(skb, sizeof(int) * NCOEFF); - for (k = 0; k < NCOEFF; k++) { - sk = sk1 = sk2 = 0; - for (n = 0; n < DTMF_NPOINTS; n++) { - sk = sample[n] + ((cos2pik[k] * sk1) >> 15) - sk2; - sk2 = sk1; - sk1 = sk; - } - /* Avoid overflows */ - sk >>= 1; - sk2 >>= 1; - /* compute |X(k)|**2 */ - /* report overflows. This should not happen. */ - /* Comment this out if desired */ - if (sk < -32768 || sk > 32767) - printk(KERN_DEBUG - "isdn_audio: dtmf goertzel overflow, sk=%d\n", sk); - if (sk2 < -32768 || sk2 > 32767) - printk(KERN_DEBUG - "isdn_audio: dtmf goertzel overflow, sk2=%d\n", sk2); - result[k] = - ((sk * sk) >> AMP_BITS) - - ((((cos2pik[k] * sk) >> 15) * sk2) >> AMP_BITS) + - ((sk2 * sk2) >> AMP_BITS); - } - skb_queue_tail(&info->dtmf_queue, skb); - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); -} - -void -isdn_audio_eval_dtmf(modem_info *info) -{ - struct sk_buff *skb; - int *result; - dtmf_state *s; - int silence; - int i; - int di; - int ch; - int grp[2]; - char what; - char *p; - int thresh; - - while ((skb = skb_dequeue(&info->dtmf_queue))) { - result = (int *) skb->data; - s = info->dtmf_state; - grp[LOGRP] = grp[HIGRP] = -1; - silence = 0; - thresh = 0; - for (i = 0; i < NCOEFF; i++) { - if (result[i] > DTMF_TRESH) { - if (result[i] > thresh) - thresh = result[i]; - } - else if (result[i] < SILENCE_TRESH) - silence++; - } - if (silence == NCOEFF) - what = ' '; - else { - if (thresh > 0) { - thresh = thresh >> 4; /* touchtones must match within 12 dB */ - for (i = 0; i < NCOEFF; i++) { - if (result[i] < thresh) - continue; /* ignore */ - /* good level found. This is allowed only one time per group */ - if (i < NCOEFF / 2) { - /* lowgroup*/ - if (grp[LOGRP] >= 0) { - // Bad. Another tone found. */ - grp[LOGRP] = -1; - break; - } - else - grp[LOGRP] = i; - } - else { /* higroup */ - if (grp[HIGRP] >= 0) { // Bad. Another tone found. */ - grp[HIGRP] = -1; - break; - } - else - grp[HIGRP] = i - NCOEFF/2; - } - } - if ((grp[LOGRP] >= 0) && (grp[HIGRP] >= 0)) { - what = dtmf_matrix[grp[LOGRP]][grp[HIGRP]]; - if (s->last != ' ' && s->last != '.') - s->last = what; /* min. 1 non-DTMF between DTMF */ - } else - what = '.'; - } - else - what = '.'; - } - if ((what != s->last) && (what != ' ') && (what != '.')) { - printk(KERN_DEBUG "dtmf: tt='%c'\n", what); - p = skb->data; - *p++ = 0x10; - *p = what; - skb_trim(skb, 2); - ISDN_AUDIO_SKB_DLECOUNT(skb) = 0; - ISDN_AUDIO_SKB_LOCK(skb) = 0; - di = info->isdn_driver; - ch = info->isdn_channel; - __skb_queue_tail(&dev->drv[di]->rpqueue[ch], skb); - dev->drv[di]->rcvcount[ch] += 2; - /* Schedule dequeuing */ - if ((dev->modempoll) && (info->rcvsched)) - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); - wake_up_interruptible(&dev->drv[di]->rcv_waitq[ch]); - } else - kfree_skb(skb); - s->last = what; - } -} - -/* - * Decode DTMF tones, queue result in separate sk_buf for - * later examination. - * Parameters: - * s = pointer to state-struct. - * buf = input audio data - * len = size of audio data. - * fmt = audio data format (0 = ulaw, 1 = alaw) - */ -void -isdn_audio_calc_dtmf(modem_info *info, unsigned char *buf, int len, int fmt) -{ - dtmf_state *s = info->dtmf_state; - int i; - int c; - - while (len) { - c = DTMF_NPOINTS - s->idx; - if (c > len) - c = len; - if (c <= 0) - break; - for (i = 0; i < c; i++) { - if (fmt) - s->buf[s->idx++] = - isdn_audio_alaw_to_s16[*buf++] >> (15 - AMP_BITS); - else - s->buf[s->idx++] = - isdn_audio_ulaw_to_s16[*buf++] >> (15 - AMP_BITS); - } - if (s->idx == DTMF_NPOINTS) { - isdn_audio_goertzel(s->buf, info); - s->idx = 0; - } - len -= c; - } -} - -silence_state * -isdn_audio_silence_init(silence_state *s) -{ - if (!s) - s = kmalloc(sizeof(silence_state), GFP_ATOMIC); - if (s) { - s->idx = 0; - s->state = 0; - } - return s; -} - -void -isdn_audio_calc_silence(modem_info *info, unsigned char *buf, int len, int fmt) -{ - silence_state *s = info->silence_state; - int i; - signed char c; - - if (!info->emu.vpar[1]) return; - - for (i = 0; i < len; i++) { - if (fmt) - c = isdn_audio_alaw_to_ulaw[*buf++]; - else - c = *buf++; - - if (c > 0) c -= 128; - c = abs(c); - - if (c > (info->emu.vpar[1] * 4)) { - s->idx = 0; - s->state = 1; - } else { - if (s->idx < 210000) s->idx++; - } - } -} - -void -isdn_audio_put_dle_code(modem_info *info, u_char code) -{ - struct sk_buff *skb; - int di; - int ch; - char *p; - - skb = dev_alloc_skb(2); - if (!skb) { - printk(KERN_WARNING - "isdn_audio: Could not alloc skb for ttyI%d\n", - info->line); - return; - } - p = skb_put(skb, 2); - p[0] = 0x10; - p[1] = code; - ISDN_AUDIO_SKB_DLECOUNT(skb) = 0; - ISDN_AUDIO_SKB_LOCK(skb) = 0; - di = info->isdn_driver; - ch = info->isdn_channel; - __skb_queue_tail(&dev->drv[di]->rpqueue[ch], skb); - dev->drv[di]->rcvcount[ch] += 2; - /* Schedule dequeuing */ - if ((dev->modempoll) && (info->rcvsched)) - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); - wake_up_interruptible(&dev->drv[di]->rcv_waitq[ch]); -} - -void -isdn_audio_eval_silence(modem_info *info) -{ - silence_state *s = info->silence_state; - char what; - - what = ' '; - - if (s->idx > (info->emu.vpar[2] * 800)) { - s->idx = 0; - if (!s->state) { /* silence from beginning of rec */ - what = 's'; - } else { - what = 'q'; - } - } - if ((what == 's') || (what == 'q')) { - printk(KERN_DEBUG "ttyI%d: %s\n", info->line, - (what == 's') ? "silence" : "quiet"); - isdn_audio_put_dle_code(info, what); - } -} diff --git a/drivers/isdn/i4l/isdn_audio.h b/drivers/isdn/i4l/isdn_audio.h deleted file mode 100644 index 013c3582e0d1..000000000000 --- a/drivers/isdn/i4l/isdn_audio.h +++ /dev/null @@ -1,44 +0,0 @@ -/* $Id: isdn_audio.h,v 1.1.2.2 2004/01/12 22:37:18 keil Exp $ - * - * Linux ISDN subsystem, audio conversion and compression (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#define DTMF_NPOINTS 205 /* Number of samples for DTMF recognition */ -typedef struct adpcm_state { - int a; - int d; - int word; - int nleft; - int nbits; -} adpcm_state; - -typedef struct dtmf_state { - char last; - char llast; - int idx; - int buf[DTMF_NPOINTS]; -} dtmf_state; - -typedef struct silence_state { - int state; - unsigned int idx; -} silence_state; - -extern void isdn_audio_ulaw2alaw(unsigned char *, unsigned long); -extern void isdn_audio_alaw2ulaw(unsigned char *, unsigned long); -extern adpcm_state *isdn_audio_adpcm_init(adpcm_state *, int); -extern int isdn_audio_adpcm2xlaw(adpcm_state *, int, unsigned char *, unsigned char *, int); -extern int isdn_audio_xlaw2adpcm(adpcm_state *, int, unsigned char *, unsigned char *, int); -extern void isdn_audio_calc_dtmf(modem_info *, unsigned char *, int, int); -extern void isdn_audio_eval_dtmf(modem_info *); -dtmf_state *isdn_audio_dtmf_init(dtmf_state *); -extern void isdn_audio_calc_silence(modem_info *, unsigned char *, int, int); -extern void isdn_audio_eval_silence(modem_info *); -silence_state *isdn_audio_silence_init(silence_state *); -extern void isdn_audio_put_dle_code(modem_info *, u_char); diff --git a/drivers/isdn/i4l/isdn_bsdcomp.c b/drivers/isdn/i4l/isdn_bsdcomp.c deleted file mode 100644 index 7f28b967ed19..000000000000 --- a/drivers/isdn/i4l/isdn_bsdcomp.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * BSD compression module - * - * Patched version for ISDN syncPPP written 1997/1998 by Michael Hipp - * The whole module is now SKB based. - * - */ - -/* - * Update: The Berkeley copyright was changed, and the change - * is retroactive to all "true" BSD software (ie everything - * from UCB as opposed to other peoples code that just carried - * the same license). The new copyright doesn't clash with the - * GPL, so the module-only restriction has been removed.. - */ - -/* - * Original copyright notice: - * - * Copyright (c) 1985, 1986 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * James A. Woods, derived from original work by Spencer Thomas - * and Joseph Orost. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* used in new tty drivers */ -#include /* used in new tty drivers */ -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include "isdn_ppp.h" - -MODULE_DESCRIPTION("ISDN4Linux: BSD Compression for PPP over ISDN"); -MODULE_LICENSE("Dual BSD/GPL"); - -#define BSD_VERSION(x) ((x) >> 5) -#define BSD_NBITS(x) ((x) & 0x1F) - -#define BSD_CURRENT_VERSION 1 - -#define DEBUG 1 - -/* - * A dictionary for doing BSD compress. - */ - -struct bsd_dict { - u32 fcode; - u16 codem1; /* output of hash table -1 */ - u16 cptr; /* map code to hash table entry */ -}; - -struct bsd_db { - int totlen; /* length of this structure */ - unsigned int hsize; /* size of the hash table */ - unsigned char hshift; /* used in hash function */ - unsigned char n_bits; /* current bits/code */ - unsigned char maxbits; /* maximum bits/code */ - unsigned char debug; /* non-zero if debug desired */ - unsigned char unit; /* ppp unit number */ - u16 seqno; /* sequence # of next packet */ - unsigned int mru; /* size of receive (decompress) bufr */ - unsigned int maxmaxcode; /* largest valid code */ - unsigned int max_ent; /* largest code in use */ - unsigned int in_count; /* uncompressed bytes, aged */ - unsigned int bytes_out; /* compressed bytes, aged */ - unsigned int ratio; /* recent compression ratio */ - unsigned int checkpoint; /* when to next check the ratio */ - unsigned int clear_count; /* times dictionary cleared */ - unsigned int incomp_count; /* incompressible packets */ - unsigned int incomp_bytes; /* incompressible bytes */ - unsigned int uncomp_count; /* uncompressed packets */ - unsigned int uncomp_bytes; /* uncompressed bytes */ - unsigned int comp_count; /* compressed packets */ - unsigned int comp_bytes; /* compressed bytes */ - unsigned short *lens; /* array of lengths of codes */ - struct bsd_dict *dict; /* dictionary */ - int xmit; -}; - -#define BSD_OVHD 2 /* BSD compress overhead/packet */ -#define MIN_BSD_BITS 9 -#define BSD_INIT_BITS MIN_BSD_BITS -#define MAX_BSD_BITS 15 - -/* - * the next two codes should not be changed lightly, as they must not - * lie within the contiguous general code space. - */ -#define CLEAR 256 /* table clear output code */ -#define FIRST 257 /* first free entry */ -#define LAST 255 - -#define MAXCODE(b) ((1 << (b)) - 1) -#define BADCODEM1 MAXCODE(MAX_BSD_BITS) - -#define BSD_HASH(prefix, suffix, hshift) ((((unsigned long)(suffix)) << (hshift)) \ - ^ (unsigned long)(prefix)) -#define BSD_KEY(prefix, suffix) ((((unsigned long)(suffix)) << 16) \ - + (unsigned long)(prefix)) - -#define CHECK_GAP 10000 /* Ratio check interval */ - -#define RATIO_SCALE_LOG 8 -#define RATIO_SCALE (1 << RATIO_SCALE_LOG) -#define RATIO_MAX (0x7fffffff >> RATIO_SCALE_LOG) - -/* - * clear the dictionary - */ - -static void bsd_clear(struct bsd_db *db) -{ - db->clear_count++; - db->max_ent = FIRST - 1; - db->n_bits = BSD_INIT_BITS; - db->bytes_out = 0; - db->in_count = 0; - db->incomp_count = 0; - db->ratio = 0; - db->checkpoint = CHECK_GAP; -} - -/* - * If the dictionary is full, then see if it is time to reset it. - * - * Compute the compression ratio using fixed-point arithmetic - * with 8 fractional bits. - * - * Since we have an infinite stream instead of a single file, - * watch only the local compression ratio. - * - * Since both peers must reset the dictionary at the same time even in - * the absence of CLEAR codes (while packets are incompressible), they - * must compute the same ratio. - */ -static int bsd_check(struct bsd_db *db) /* 1=output CLEAR */ -{ - unsigned int new_ratio; - - if (db->in_count >= db->checkpoint) - { - /* age the ratio by limiting the size of the counts */ - if (db->in_count >= RATIO_MAX || db->bytes_out >= RATIO_MAX) - { - db->in_count -= (db->in_count >> 2); - db->bytes_out -= (db->bytes_out >> 2); - } - - db->checkpoint = db->in_count + CHECK_GAP; - - if (db->max_ent >= db->maxmaxcode) - { - /* Reset the dictionary only if the ratio is worse, - * or if it looks as if it has been poisoned - * by incompressible data. - * - * This does not overflow, because - * db->in_count <= RATIO_MAX. - */ - - new_ratio = db->in_count << RATIO_SCALE_LOG; - if (db->bytes_out != 0) - { - new_ratio /= db->bytes_out; - } - - if (new_ratio < db->ratio || new_ratio < 1 * RATIO_SCALE) - { - bsd_clear(db); - return 1; - } - db->ratio = new_ratio; - } - } - return 0; -} - -/* - * Return statistics. - */ - -static void bsd_stats(void *state, struct compstat *stats) -{ - struct bsd_db *db = (struct bsd_db *) state; - - stats->unc_bytes = db->uncomp_bytes; - stats->unc_packets = db->uncomp_count; - stats->comp_bytes = db->comp_bytes; - stats->comp_packets = db->comp_count; - stats->inc_bytes = db->incomp_bytes; - stats->inc_packets = db->incomp_count; - stats->in_count = db->in_count; - stats->bytes_out = db->bytes_out; -} - -/* - * Reset state, as on a CCP ResetReq. - */ -static void bsd_reset(void *state, unsigned char code, unsigned char id, - unsigned char *data, unsigned len, - struct isdn_ppp_resetparams *rsparm) -{ - struct bsd_db *db = (struct bsd_db *) state; - - bsd_clear(db); - db->seqno = 0; - db->clear_count = 0; -} - -/* - * Release the compression structure - */ -static void bsd_free(void *state) -{ - struct bsd_db *db = (struct bsd_db *) state; - - if (db) { - /* - * Release the dictionary - */ - vfree(db->dict); - db->dict = NULL; - - /* - * Release the string buffer - */ - vfree(db->lens); - db->lens = NULL; - - /* - * Finally release the structure itself. - */ - kfree(db); - } -} - - -/* - * Allocate space for a (de) compressor. - */ -static void *bsd_alloc(struct isdn_ppp_comp_data *data) -{ - int bits; - unsigned int hsize, hshift, maxmaxcode; - struct bsd_db *db; - int decomp; - - static unsigned int htab[][2] = { - { 5003 , 4 } , { 5003 , 4 } , { 5003 , 4 } , { 5003 , 4 } , - { 9001 , 5 } , { 18013 , 6 } , { 35023 , 7 } , { 69001 , 8 } - }; - - if (data->optlen != 1 || data->num != CI_BSD_COMPRESS - || BSD_VERSION(data->options[0]) != BSD_CURRENT_VERSION) - return NULL; - - bits = BSD_NBITS(data->options[0]); - - if (bits < 9 || bits > 15) - return NULL; - - hsize = htab[bits - 9][0]; - hshift = htab[bits - 9][1]; - - /* - * Allocate the main control structure for this instance. - */ - maxmaxcode = MAXCODE(bits); - db = kzalloc(sizeof(struct bsd_db), GFP_KERNEL); - if (!db) - return NULL; - - db->xmit = data->flags & IPPP_COMP_FLAG_XMIT; - decomp = db->xmit ? 0 : 1; - - /* - * Allocate space for the dictionary. This may be more than one page in - * length. - */ - db->dict = vmalloc(array_size(hsize, sizeof(struct bsd_dict))); - if (!db->dict) { - bsd_free(db); - return NULL; - } - - /* - * If this is the compression buffer then there is no length data. - * For decompression, the length information is needed as well. - */ - if (!decomp) - db->lens = NULL; - else { - db->lens = vmalloc(array_size(sizeof(db->lens[0]), - maxmaxcode + 1)); - if (!db->lens) { - bsd_free(db); - return (NULL); - } - } - - /* - * Initialize the data information for the compression code - */ - db->totlen = sizeof(struct bsd_db) + (sizeof(struct bsd_dict) * hsize); - db->hsize = hsize; - db->hshift = hshift; - db->maxmaxcode = maxmaxcode; - db->maxbits = bits; - - return (void *)db; -} - -/* - * Initialize the database. - */ -static int bsd_init(void *state, struct isdn_ppp_comp_data *data, int unit, int debug) -{ - struct bsd_db *db = state; - int indx; - int decomp; - - if (!state || !data) { - printk(KERN_ERR "isdn_bsd_init: [%d] ERR, state %lx data %lx\n", unit, (long)state, (long)data); - return 0; - } - - decomp = db->xmit ? 0 : 1; - - if (data->optlen != 1 || data->num != CI_BSD_COMPRESS - || (BSD_VERSION(data->options[0]) != BSD_CURRENT_VERSION) - || (BSD_NBITS(data->options[0]) != db->maxbits) - || (decomp && db->lens == NULL)) { - printk(KERN_ERR "isdn_bsd: %d %d %d %d %lx\n", data->optlen, data->num, data->options[0], decomp, (unsigned long)db->lens); - return 0; - } - - if (decomp) - for (indx = LAST; indx >= 0; indx--) - db->lens[indx] = 1; - - indx = db->hsize; - while (indx-- != 0) { - db->dict[indx].codem1 = BADCODEM1; - db->dict[indx].cptr = 0; - } - - db->unit = unit; - db->mru = 0; - - db->debug = 1; - - bsd_reset(db, 0, 0, NULL, 0, NULL); - - return 1; -} - -/* - * Obtain pointers to the various structures in the compression tables - */ - -#define dict_ptrx(p, idx) &(p->dict[idx]) -#define lens_ptrx(p, idx) &(p->lens[idx]) - -#ifdef DEBUG -static unsigned short *lens_ptr(struct bsd_db *db, int idx) -{ - if ((unsigned int) idx > (unsigned int) db->maxmaxcode) { - printk(KERN_DEBUG "<9>ppp: lens_ptr(%d) > max\n", idx); - idx = 0; - } - return lens_ptrx(db, idx); -} - -static struct bsd_dict *dict_ptr(struct bsd_db *db, int idx) -{ - if ((unsigned int) idx >= (unsigned int) db->hsize) { - printk(KERN_DEBUG "<9>ppp: dict_ptr(%d) > max\n", idx); - idx = 0; - } - return dict_ptrx(db, idx); -} - -#else -#define lens_ptr(db, idx) lens_ptrx(db, idx) -#define dict_ptr(db, idx) dict_ptrx(db, idx) -#endif - -/* - * compress a packet - */ -static int bsd_compress(void *state, struct sk_buff *skb_in, struct sk_buff *skb_out, int proto) -{ - struct bsd_db *db; - int hshift; - unsigned int max_ent; - unsigned int n_bits; - unsigned int bitno; - unsigned long accm; - int ent; - unsigned long fcode; - struct bsd_dict *dictp; - unsigned char c; - int hval, disp, ilen, mxcode; - unsigned char *rptr = skb_in->data; - int isize = skb_in->len; - -#define OUTPUT(ent) \ - { \ - bitno -= n_bits; \ - accm |= ((ent) << bitno); \ - do { \ - if (skb_out && skb_tailroom(skb_out) > 0) \ - skb_put_u8(skb_out, (u8)(accm >> 24)); \ - accm <<= 8; \ - bitno += 8; \ - } while (bitno <= 24); \ - } - - /* - * If the protocol is not in the range we're interested in, - * just return without compressing the packet. If it is, - * the protocol becomes the first byte to compress. - */ - printk(KERN_DEBUG "bsd_compress called with %x\n", proto); - - ent = proto; - if (proto < 0x21 || proto > 0xf9 || !(proto & 0x1)) - return 0; - - db = (struct bsd_db *) state; - hshift = db->hshift; - max_ent = db->max_ent; - n_bits = db->n_bits; - bitno = 32; - accm = 0; - mxcode = MAXCODE(n_bits); - - /* This is the PPP header information */ - if (skb_out && skb_tailroom(skb_out) >= 2) { - char *v = skb_put(skb_out, 2); - /* we only push our own data on the header, - AC,PC and protos is pushed by caller */ - v[0] = db->seqno >> 8; - v[1] = db->seqno; - } - - ilen = ++isize; /* This is off by one, but that is what is in draft! */ - - while (--ilen > 0) { - c = *rptr++; - fcode = BSD_KEY(ent, c); - hval = BSD_HASH(ent, c, hshift); - dictp = dict_ptr(db, hval); - - /* Validate and then check the entry. */ - if (dictp->codem1 >= max_ent) - goto nomatch; - - if (dictp->fcode == fcode) { - ent = dictp->codem1 + 1; - continue; /* found (prefix,suffix) */ - } - - /* continue probing until a match or invalid entry */ - disp = (hval == 0) ? 1 : hval; - - do { - hval += disp; - if (hval >= db->hsize) - hval -= db->hsize; - dictp = dict_ptr(db, hval); - if (dictp->codem1 >= max_ent) - goto nomatch; - } while (dictp->fcode != fcode); - - ent = dictp->codem1 + 1; /* finally found (prefix,suffix) */ - continue; - - nomatch: - OUTPUT(ent); /* output the prefix */ - - /* code -> hashtable */ - if (max_ent < db->maxmaxcode) { - struct bsd_dict *dictp2; - struct bsd_dict *dictp3; - int indx; - - /* expand code size if needed */ - if (max_ent >= mxcode) { - db->n_bits = ++n_bits; - mxcode = MAXCODE(n_bits); - } - - /* - * Invalidate old hash table entry using - * this code, and then take it over. - */ - dictp2 = dict_ptr(db, max_ent + 1); - indx = dictp2->cptr; - dictp3 = dict_ptr(db, indx); - - if (dictp3->codem1 == max_ent) - dictp3->codem1 = BADCODEM1; - - dictp2->cptr = hval; - dictp->codem1 = max_ent; - dictp->fcode = fcode; - db->max_ent = ++max_ent; - - if (db->lens) { - unsigned short *len1 = lens_ptr(db, max_ent); - unsigned short *len2 = lens_ptr(db, ent); - *len1 = *len2 + 1; - } - } - ent = c; - } - - OUTPUT(ent); /* output the last code */ - - if (skb_out) - db->bytes_out += skb_out->len; /* Do not count bytes from here */ - db->uncomp_bytes += isize; - db->in_count += isize; - ++db->uncomp_count; - ++db->seqno; - - if (bitno < 32) - ++db->bytes_out; /* must be set before calling bsd_check */ - - /* - * Generate the clear command if needed - */ - - if (bsd_check(db)) - OUTPUT(CLEAR); - - /* - * Pad dribble bits of last code with ones. - * Do not emit a completely useless byte of ones. - */ - if (bitno < 32 && skb_out && skb_tailroom(skb_out) > 0) - skb_put_u8(skb_out, - (unsigned char)((accm | (0xff << (bitno - 8))) >> 24)); - - /* - * Increase code size if we would have without the packet - * boundary because the decompressor will do so. - */ - if (max_ent >= mxcode && max_ent < db->maxmaxcode) - db->n_bits++; - - /* If output length is too large then this is an incompressible frame. */ - if (!skb_out || skb_out->len >= skb_in->len) { - ++db->incomp_count; - db->incomp_bytes += isize; - return 0; - } - - /* Count the number of compressed frames */ - ++db->comp_count; - db->comp_bytes += skb_out->len; - return skb_out->len; - -#undef OUTPUT -} - -/* - * Update the "BSD Compress" dictionary on the receiver for - * incompressible data by pretending to compress the incoming data. - */ -static void bsd_incomp(void *state, struct sk_buff *skb_in, int proto) -{ - bsd_compress(state, skb_in, NULL, proto); -} - -/* - * Decompress "BSD Compress". - */ -static int bsd_decompress(void *state, struct sk_buff *skb_in, struct sk_buff *skb_out, - struct isdn_ppp_resetparams *rsparm) -{ - struct bsd_db *db; - unsigned int max_ent; - unsigned long accm; - unsigned int bitno; /* 1st valid bit in accm */ - unsigned int n_bits; - unsigned int tgtbitno; /* bitno when we have a code */ - struct bsd_dict *dictp; - int seq; - unsigned int incode; - unsigned int oldcode; - unsigned int finchar; - unsigned char *p, *ibuf; - int ilen; - int codelen; - int extra; - - db = (struct bsd_db *) state; - max_ent = db->max_ent; - accm = 0; - bitno = 32; /* 1st valid bit in accm */ - n_bits = db->n_bits; - tgtbitno = 32 - n_bits; /* bitno when we have a code */ - - printk(KERN_DEBUG "bsd_decompress called\n"); - - if (!skb_in || !skb_out) { - printk(KERN_ERR "bsd_decompress called with NULL parameter\n"); - return DECOMP_ERROR; - } - - /* - * Get the sequence number. - */ - if ((p = skb_pull(skb_in, 2)) == NULL) { - return DECOMP_ERROR; - } - p -= 2; - seq = (p[0] << 8) + p[1]; - ilen = skb_in->len; - ibuf = skb_in->data; - - /* - * Check the sequence number and give up if it differs from - * the value we're expecting. - */ - if (seq != db->seqno) { - if (db->debug) { - printk(KERN_DEBUG "bsd_decomp%d: bad sequence # %d, expected %d\n", - db->unit, seq, db->seqno - 1); - } - return DECOMP_ERROR; - } - - ++db->seqno; - db->bytes_out += ilen; - - if (skb_tailroom(skb_out) > 0) - skb_put_u8(skb_out, 0); - else - return DECOMP_ERR_NOMEM; - - oldcode = CLEAR; - - /* - * Keep the checkpoint correctly so that incompressible packets - * clear the dictionary at the proper times. - */ - - for (;;) { - if (ilen-- <= 0) { - db->in_count += (skb_out->len - 1); /* don't count the header */ - break; - } - - /* - * Accumulate bytes until we have a complete code. - * Then get the next code, relying on the 32-bit, - * unsigned accm to mask the result. - */ - - bitno -= 8; - accm |= *ibuf++ << bitno; - if (tgtbitno < bitno) - continue; - - incode = accm >> tgtbitno; - accm <<= n_bits; - bitno += n_bits; - - /* - * The dictionary must only be cleared at the end of a packet. - */ - - if (incode == CLEAR) { - if (ilen > 0) { - if (db->debug) - printk(KERN_DEBUG "bsd_decomp%d: bad CLEAR\n", db->unit); - return DECOMP_FATALERROR; /* probably a bug */ - } - bsd_clear(db); - break; - } - - if ((incode > max_ent + 2) || (incode > db->maxmaxcode) - || (incode > max_ent && oldcode == CLEAR)) { - if (db->debug) { - printk(KERN_DEBUG "bsd_decomp%d: bad code 0x%x oldcode=0x%x ", - db->unit, incode, oldcode); - printk(KERN_DEBUG "max_ent=0x%x skb->Len=%d seqno=%d\n", - max_ent, skb_out->len, db->seqno); - } - return DECOMP_FATALERROR; /* probably a bug */ - } - - /* Special case for KwKwK string. */ - if (incode > max_ent) { - finchar = oldcode; - extra = 1; - } else { - finchar = incode; - extra = 0; - } - - codelen = *(lens_ptr(db, finchar)); - if (skb_tailroom(skb_out) < codelen + extra) { - if (db->debug) { - printk(KERN_DEBUG "bsd_decomp%d: ran out of mru\n", db->unit); -#ifdef DEBUG - printk(KERN_DEBUG " len=%d, finchar=0x%x, codelen=%d,skblen=%d\n", - ilen, finchar, codelen, skb_out->len); -#endif - } - return DECOMP_FATALERROR; - } - - /* - * Decode this code and install it in the decompressed buffer. - */ - - p = skb_put(skb_out, codelen); - p += codelen; - while (finchar > LAST) { - struct bsd_dict *dictp2 = dict_ptr(db, finchar); - - dictp = dict_ptr(db, dictp2->cptr); - -#ifdef DEBUG - if (--codelen <= 0 || dictp->codem1 != finchar - 1) { - if (codelen <= 0) { - printk(KERN_ERR "bsd_decomp%d: fell off end of chain ", db->unit); - printk(KERN_ERR "0x%x at 0x%x by 0x%x, max_ent=0x%x\n", incode, finchar, dictp2->cptr, max_ent); - } else { - if (dictp->codem1 != finchar - 1) { - printk(KERN_ERR "bsd_decomp%d: bad code chain 0x%x finchar=0x%x ", db->unit, incode, finchar); - printk(KERN_ERR "oldcode=0x%x cptr=0x%x codem1=0x%x\n", oldcode, dictp2->cptr, dictp->codem1); - } - } - return DECOMP_FATALERROR; - } -#endif - - { - u32 fcode = dictp->fcode; - *--p = (fcode >> 16) & 0xff; - finchar = fcode & 0xffff; - } - } - *--p = finchar; - -#ifdef DEBUG - if (--codelen != 0) - printk(KERN_ERR "bsd_decomp%d: short by %d after code 0x%x, max_ent=0x%x\n", db->unit, codelen, incode, max_ent); -#endif - - if (extra) /* the KwKwK case again */ - skb_put_u8(skb_out, finchar); - - /* - * If not first code in a packet, and - * if not out of code space, then allocate a new code. - * - * Keep the hash table correct so it can be used - * with uncompressed packets. - */ - if (oldcode != CLEAR && max_ent < db->maxmaxcode) { - struct bsd_dict *dictp2, *dictp3; - u16 *lens1, *lens2; - unsigned long fcode; - int hval, disp, indx; - - fcode = BSD_KEY(oldcode, finchar); - hval = BSD_HASH(oldcode, finchar, db->hshift); - dictp = dict_ptr(db, hval); - - /* look for a free hash table entry */ - if (dictp->codem1 < max_ent) { - disp = (hval == 0) ? 1 : hval; - do { - hval += disp; - if (hval >= db->hsize) - hval -= db->hsize; - dictp = dict_ptr(db, hval); - } while (dictp->codem1 < max_ent); - } - - /* - * Invalidate previous hash table entry - * assigned this code, and then take it over - */ - - dictp2 = dict_ptr(db, max_ent + 1); - indx = dictp2->cptr; - dictp3 = dict_ptr(db, indx); - - if (dictp3->codem1 == max_ent) - dictp3->codem1 = BADCODEM1; - - dictp2->cptr = hval; - dictp->codem1 = max_ent; - dictp->fcode = fcode; - db->max_ent = ++max_ent; - - /* Update the length of this string. */ - lens1 = lens_ptr(db, max_ent); - lens2 = lens_ptr(db, oldcode); - *lens1 = *lens2 + 1; - - /* Expand code size if needed. */ - if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode) { - db->n_bits = ++n_bits; - tgtbitno = 32-n_bits; - } - } - oldcode = incode; - } - - ++db->comp_count; - ++db->uncomp_count; - db->comp_bytes += skb_in->len - BSD_OVHD; - db->uncomp_bytes += skb_out->len; - - if (bsd_check(db)) { - if (db->debug) - printk(KERN_DEBUG "bsd_decomp%d: peer should have cleared dictionary on %d\n", - db->unit, db->seqno - 1); - } - return skb_out->len; -} - -/************************************************************* - * Table of addresses for the BSD compression module - *************************************************************/ - -static struct isdn_ppp_compressor ippp_bsd_compress = { - .owner = THIS_MODULE, - .num = CI_BSD_COMPRESS, - .alloc = bsd_alloc, - .free = bsd_free, - .init = bsd_init, - .reset = bsd_reset, - .compress = bsd_compress, - .decompress = bsd_decompress, - .incomp = bsd_incomp, - .stat = bsd_stats, -}; - -/************************************************************* - * Module support routines - *************************************************************/ - -static int __init isdn_bsdcomp_init(void) -{ - int answer = isdn_ppp_register_compressor(&ippp_bsd_compress); - if (answer == 0) - printk(KERN_INFO "PPP BSD Compression module registered\n"); - return answer; -} - -static void __exit isdn_bsdcomp_exit(void) -{ - isdn_ppp_unregister_compressor(&ippp_bsd_compress); -} - -module_init(isdn_bsdcomp_init); -module_exit(isdn_bsdcomp_exit); diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c deleted file mode 100644 index 74ee00f5b310..000000000000 --- a/drivers/isdn/i4l/isdn_common.c +++ /dev/null @@ -1,2368 +0,0 @@ -/* $Id: isdn_common.c,v 1.1.2.3 2004/02/10 01:07:13 keil Exp $ - * - * Linux ISDN subsystem, common used functions (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include "isdn_common.h" -#include "isdn_tty.h" -#include "isdn_net.h" -#include "isdn_ppp.h" -#ifdef CONFIG_ISDN_AUDIO -#include "isdn_audio.h" -#endif -#ifdef CONFIG_ISDN_DIVERSION_MODULE -#define CONFIG_ISDN_DIVERSION -#endif -#ifdef CONFIG_ISDN_DIVERSION -#include -#endif /* CONFIG_ISDN_DIVERSION */ -#include "isdn_v110.h" - -/* Debugflags */ -#undef ISDN_DEBUG_STATCALLB - -MODULE_DESCRIPTION("ISDN4Linux: link layer"); -MODULE_AUTHOR("Fritz Elfert"); -MODULE_LICENSE("GPL"); - -isdn_dev *dev; - -static DEFINE_MUTEX(isdn_mutex); -static char *isdn_revision = "$Revision: 1.1.2.3 $"; - -extern char *isdn_net_revision; -#ifdef CONFIG_ISDN_PPP -extern char *isdn_ppp_revision; -#else -static char *isdn_ppp_revision = ": none $"; -#endif -#ifdef CONFIG_ISDN_AUDIO -extern char *isdn_audio_revision; -#else -static char *isdn_audio_revision = ": none $"; -#endif -extern char *isdn_v110_revision; - -#ifdef CONFIG_ISDN_DIVERSION -static isdn_divert_if *divert_if; /* = NULL */ -#endif /* CONFIG_ISDN_DIVERSION */ - - -static int isdn_writebuf_stub(int, int, const u_char __user *, int); -static void set_global_features(void); -static int isdn_wildmat(char *s, char *p); -static int isdn_add_channels(isdn_driver_t *d, int drvidx, int n, int adding); - -static inline void -isdn_lock_driver(isdn_driver_t *drv) -{ - try_module_get(drv->interface->owner); - drv->locks++; -} - -void -isdn_lock_drivers(void) -{ - int i; - - for (i = 0; i < ISDN_MAX_DRIVERS; i++) { - if (!dev->drv[i]) - continue; - isdn_lock_driver(dev->drv[i]); - } -} - -static inline void -isdn_unlock_driver(isdn_driver_t *drv) -{ - if (drv->locks > 0) { - drv->locks--; - module_put(drv->interface->owner); - } -} - -void -isdn_unlock_drivers(void) -{ - int i; - - for (i = 0; i < ISDN_MAX_DRIVERS; i++) { - if (!dev->drv[i]) - continue; - isdn_unlock_driver(dev->drv[i]); - } -} - -#if defined(ISDN_DEBUG_NET_DUMP) || defined(ISDN_DEBUG_MODEM_DUMP) -void -isdn_dumppkt(char *s, u_char *p, int len, int dumplen) -{ - int dumpc; - - printk(KERN_DEBUG "%s(%d) ", s, len); - for (dumpc = 0; (dumpc < dumplen) && (len); len--, dumpc++) - printk(" %02x", *p++); - printk("\n"); -} -#endif - -/* - * I picked the pattern-matching-functions from an old GNU-tar version (1.10) - * It was originally written and put to PD by rs@mirror.TMC.COM (Rich Salz) - */ -static int -isdn_star(char *s, char *p) -{ - while (isdn_wildmat(s, p)) { - if (*++s == '\0') - return (2); - } - return (0); -} - -/* - * Shell-type Pattern-matching for incoming caller-Ids - * This function gets a string in s and checks, if it matches the pattern - * given in p. - * - * Return: - * 0 = match. - * 1 = no match. - * 2 = no match. Would eventually match, if s would be longer. - * - * Possible Patterns: - * - * '?' matches one character - * '*' matches zero or more characters - * [xyz] matches the set of characters in brackets. - * [^xyz] matches any single character not in the set of characters - */ - -static int -isdn_wildmat(char *s, char *p) -{ - register int last; - register int matched; - register int reverse; - register int nostar = 1; - - if (!(*s) && !(*p)) - return (1); - for (; *p; s++, p++) - switch (*p) { - case '\\': - /* Literal match with following character. */ - p++; - /* fall through */ - default: - if (*s != *p) - return (*s == '\0') ? 2 : 1; - continue; - case '?': - /* Match anything. */ - if (*s == '\0') - return (2); - continue; - case '*': - nostar = 0; - /* Trailing star matches everything. */ - return (*++p ? isdn_star(s, p) : 0); - case '[': - /* [^....] means inverse character class. */ - if ((reverse = (p[1] == '^'))) - p++; - for (last = 0, matched = 0; *++p && (*p != ']'); last = *p) - /* This next line requires a good C compiler. */ - if (*p == '-' ? *s <= *++p && *s >= last : *s == *p) - matched = 1; - if (matched == reverse) - return (1); - continue; - } - return (*s == '\0') ? 0 : nostar; -} - -int isdn_msncmp(const char *msn1, const char *msn2) -{ - char TmpMsn1[ISDN_MSNLEN]; - char TmpMsn2[ISDN_MSNLEN]; - char *p; - - for (p = TmpMsn1; *msn1 && *msn1 != ':';) // Strip off a SPID - *p++ = *msn1++; - *p = '\0'; - - for (p = TmpMsn2; *msn2 && *msn2 != ':';) // Strip off a SPID - *p++ = *msn2++; - *p = '\0'; - - return isdn_wildmat(TmpMsn1, TmpMsn2); -} - -int -isdn_dc2minor(int di, int ch) -{ - int i; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (dev->chanmap[i] == ch && dev->drvmap[i] == di) - return i; - return -1; -} - -static int isdn_timer_cnt1 = 0; -static int isdn_timer_cnt2 = 0; -static int isdn_timer_cnt3 = 0; - -static void -isdn_timer_funct(struct timer_list *unused) -{ - int tf = dev->tflags; - if (tf & ISDN_TIMER_FAST) { - if (tf & ISDN_TIMER_MODEMREAD) - isdn_tty_readmodem(); - if (tf & ISDN_TIMER_MODEMPLUS) - isdn_tty_modem_escape(); - if (tf & ISDN_TIMER_MODEMXMIT) - isdn_tty_modem_xmit(); - } - if (tf & ISDN_TIMER_SLOW) { - if (++isdn_timer_cnt1 >= ISDN_TIMER_02SEC) { - isdn_timer_cnt1 = 0; - if (tf & ISDN_TIMER_NETDIAL) - isdn_net_dial(); - } - if (++isdn_timer_cnt2 >= ISDN_TIMER_1SEC) { - isdn_timer_cnt2 = 0; - if (tf & ISDN_TIMER_NETHANGUP) - isdn_net_autohup(); - if (++isdn_timer_cnt3 >= ISDN_TIMER_RINGING) { - isdn_timer_cnt3 = 0; - if (tf & ISDN_TIMER_MODEMRING) - isdn_tty_modem_ring(); - } - if (tf & ISDN_TIMER_CARRIER) - isdn_tty_carrier_timeout(); - } - } - if (tf) - mod_timer(&dev->timer, jiffies + ISDN_TIMER_RES); -} - -void -isdn_timer_ctrl(int tf, int onoff) -{ - unsigned long flags; - int old_tflags; - - spin_lock_irqsave(&dev->timerlock, flags); - if ((tf & ISDN_TIMER_SLOW) && (!(dev->tflags & ISDN_TIMER_SLOW))) { - /* If the slow-timer wasn't activated until now */ - isdn_timer_cnt1 = 0; - isdn_timer_cnt2 = 0; - } - old_tflags = dev->tflags; - if (onoff) - dev->tflags |= tf; - else - dev->tflags &= ~tf; - if (dev->tflags && !old_tflags) - mod_timer(&dev->timer, jiffies + ISDN_TIMER_RES); - spin_unlock_irqrestore(&dev->timerlock, flags); -} - -/* - * Receive a packet from B-Channel. (Called from low-level-module) - */ -static void -isdn_receive_skb_callback(int di, int channel, struct sk_buff *skb) -{ - int i; - - if ((i = isdn_dc2minor(di, channel)) == -1) { - dev_kfree_skb(skb); - return; - } - /* Update statistics */ - dev->ibytes[i] += skb->len; - - /* First, try to deliver data to network-device */ - if (isdn_net_rcv_skb(i, skb)) - return; - - /* V.110 handling - * makes sense for async streams only, so it is - * called after possible net-device delivery. - */ - if (dev->v110[i]) { - atomic_inc(&dev->v110use[i]); - skb = isdn_v110_decode(dev->v110[i], skb); - atomic_dec(&dev->v110use[i]); - if (!skb) - return; - } - - /* No network-device found, deliver to tty or raw-channel */ - if (skb->len) { - if (isdn_tty_rcv_skb(i, di, channel, skb)) - return; - wake_up_interruptible(&dev->drv[di]->rcv_waitq[channel]); - } else - dev_kfree_skb(skb); -} - -/* - * Intercept command from Linklevel to Lowlevel. - * If layer 2 protocol is V.110 and this is not supported by current - * lowlevel-driver, use driver's transparent mode and handle V.110 in - * linklevel instead. - */ -int -isdn_command(isdn_ctrl *cmd) -{ - if (cmd->driver == -1) { - printk(KERN_WARNING "isdn_command command(%x) driver -1\n", cmd->command); - return (1); - } - if (!dev->drv[cmd->driver]) { - printk(KERN_WARNING "isdn_command command(%x) dev->drv[%d] NULL\n", - cmd->command, cmd->driver); - return (1); - } - if (!dev->drv[cmd->driver]->interface) { - printk(KERN_WARNING "isdn_command command(%x) dev->drv[%d]->interface NULL\n", - cmd->command, cmd->driver); - return (1); - } - if (cmd->command == ISDN_CMD_SETL2) { - int idx = isdn_dc2minor(cmd->driver, cmd->arg & 255); - unsigned long l2prot = (cmd->arg >> 8) & 255; - unsigned long features = (dev->drv[cmd->driver]->interface->features - >> ISDN_FEATURE_L2_SHIFT) & - ISDN_FEATURE_L2_MASK; - unsigned long l2_feature = (1 << l2prot); - - switch (l2prot) { - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - /* If V.110 requested, but not supported by - * HL-driver, set emulator-flag and change - * Layer-2 to transparent - */ - if (!(features & l2_feature)) { - dev->v110emu[idx] = l2prot; - cmd->arg = (cmd->arg & 255) | - (ISDN_PROTO_L2_TRANS << 8); - } else - dev->v110emu[idx] = 0; - } - } - return dev->drv[cmd->driver]->interface->command(cmd); -} - -void -isdn_all_eaz(int di, int ch) -{ - isdn_ctrl cmd; - - if (di < 0) - return; - cmd.driver = di; - cmd.arg = ch; - cmd.command = ISDN_CMD_SETEAZ; - cmd.parm.num[0] = '\0'; - isdn_command(&cmd); -} - -/* - * Begin of a CAPI like LL<->HL interface, currently used only for - * supplementary service (CAPI 2.0 part III) - */ -#include - -static int -isdn_capi_rec_hl_msg(capi_msg *cm) -{ - switch (cm->Command) { - case CAPI_FACILITY: - /* in the moment only handled in tty */ - return (isdn_tty_capi_facility(cm)); - default: - return (-1); - } -} - -static int -isdn_status_callback(isdn_ctrl *c) -{ - int di; - u_long flags; - int i; - int r; - int retval = 0; - isdn_ctrl cmd; - isdn_net_dev *p; - - di = c->driver; - i = isdn_dc2minor(di, c->arg); - switch (c->command) { - case ISDN_STAT_BSENT: - if (i < 0) - return -1; - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - if (isdn_net_stat_callback(i, c)) - return 0; - if (isdn_v110_stat_callback(i, c)) - return 0; - if (isdn_tty_stat_callback(i, c)) - return 0; - wake_up_interruptible(&dev->drv[di]->snd_waitq[c->arg]); - break; - case ISDN_STAT_STAVAIL: - dev->drv[di]->stavail += c->arg; - wake_up_interruptible(&dev->drv[di]->st_waitq); - break; - case ISDN_STAT_RUN: - dev->drv[di]->flags |= DRV_FLAG_RUNNING; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (dev->drvmap[i] == di) - isdn_all_eaz(di, dev->chanmap[i]); - set_global_features(); - break; - case ISDN_STAT_STOP: - dev->drv[di]->flags &= ~DRV_FLAG_RUNNING; - break; - case ISDN_STAT_ICALL: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "ICALL (net): %d %ld %s\n", di, c->arg, c->parm.num); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) { - cmd.driver = di; - cmd.arg = c->arg; - cmd.command = ISDN_CMD_HANGUP; - isdn_command(&cmd); - return 0; - } - /* Try to find a network-interface which will accept incoming call */ - r = ((c->command == ISDN_STAT_ICALLW) ? 0 : isdn_net_find_icall(di, c->arg, i, &c->parm.setup)); - switch (r) { - case 0: - /* No network-device replies. - * Try ttyI's. - * These return 0 on no match, 1 on match and - * 3 on eventually match, if CID is longer. - */ - if (c->command == ISDN_STAT_ICALL) - if ((retval = isdn_tty_find_icall(di, c->arg, &c->parm.setup))) return (retval); -#ifdef CONFIG_ISDN_DIVERSION - if (divert_if) - if ((retval = divert_if->stat_callback(c))) - return (retval); /* processed */ -#endif /* CONFIG_ISDN_DIVERSION */ - if ((!retval) && (dev->drv[di]->flags & DRV_FLAG_REJBUS)) { - /* No tty responding */ - cmd.driver = di; - cmd.arg = c->arg; - cmd.command = ISDN_CMD_HANGUP; - isdn_command(&cmd); - retval = 2; - } - break; - case 1: - /* Schedule connection-setup */ - isdn_net_dial(); - cmd.driver = di; - cmd.arg = c->arg; - cmd.command = ISDN_CMD_ACCEPTD; - for (p = dev->netdev; p; p = p->next) - if (p->local->isdn_channel == cmd.arg) - { - strcpy(cmd.parm.setup.eazmsn, p->local->msn); - isdn_command(&cmd); - retval = 1; - break; - } - break; - - case 2: /* For calling back, first reject incoming call ... */ - case 3: /* Interface found, but down, reject call actively */ - retval = 2; - printk(KERN_INFO "isdn: Rejecting Call\n"); - cmd.driver = di; - cmd.arg = c->arg; - cmd.command = ISDN_CMD_HANGUP; - isdn_command(&cmd); - if (r == 3) - break; - /* Fall through */ - case 4: - /* ... then start callback. */ - isdn_net_dial(); - break; - case 5: - /* Number would eventually match, if longer */ - retval = 3; - break; - } -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "ICALL: ret=%d\n", retval); -#endif - return retval; - break; - case ISDN_STAT_CINF: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "CINF: %ld %s\n", c->arg, c->parm.num); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - if (strcmp(c->parm.num, "0")) - isdn_net_stat_callback(i, c); - isdn_tty_stat_callback(i, c); - break; - case ISDN_STAT_CAUSE: -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "CAUSE: %ld %s\n", c->arg, c->parm.num); -#endif - printk(KERN_INFO "isdn: %s,ch%ld cause: %s\n", - dev->drvid[di], c->arg, c->parm.num); - isdn_tty_stat_callback(i, c); -#ifdef CONFIG_ISDN_DIVERSION - if (divert_if) - divert_if->stat_callback(c); -#endif /* CONFIG_ISDN_DIVERSION */ - break; - case ISDN_STAT_DISPLAY: -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "DISPLAY: %ld %s\n", c->arg, c->parm.display); -#endif - isdn_tty_stat_callback(i, c); -#ifdef CONFIG_ISDN_DIVERSION - if (divert_if) - divert_if->stat_callback(c); -#endif /* CONFIG_ISDN_DIVERSION */ - break; - case ISDN_STAT_DCONN: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "DCONN: %ld\n", c->arg); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - /* Find any net-device, waiting for D-channel setup */ - if (isdn_net_stat_callback(i, c)) - break; - isdn_v110_stat_callback(i, c); - /* Find any ttyI, waiting for D-channel setup */ - if (isdn_tty_stat_callback(i, c)) { - cmd.driver = di; - cmd.arg = c->arg; - cmd.command = ISDN_CMD_ACCEPTB; - isdn_command(&cmd); - break; - } - break; - case ISDN_STAT_DHUP: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "DHUP: %ld\n", c->arg); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - dev->drv[di]->online &= ~(1 << (c->arg)); - isdn_info_update(); - /* Signal hangup to network-devices */ - if (isdn_net_stat_callback(i, c)) - break; - isdn_v110_stat_callback(i, c); - if (isdn_tty_stat_callback(i, c)) - break; -#ifdef CONFIG_ISDN_DIVERSION - if (divert_if) - divert_if->stat_callback(c); -#endif /* CONFIG_ISDN_DIVERSION */ - break; - break; - case ISDN_STAT_BCONN: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "BCONN: %ld\n", c->arg); -#endif - /* Signal B-channel-connect to network-devices */ - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - dev->drv[di]->online |= (1 << (c->arg)); - isdn_info_update(); - if (isdn_net_stat_callback(i, c)) - break; - isdn_v110_stat_callback(i, c); - if (isdn_tty_stat_callback(i, c)) - break; - break; - case ISDN_STAT_BHUP: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "BHUP: %ld\n", c->arg); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - dev->drv[di]->online &= ~(1 << (c->arg)); - isdn_info_update(); -#ifdef CONFIG_ISDN_X25 - /* Signal hangup to network-devices */ - if (isdn_net_stat_callback(i, c)) - break; -#endif - isdn_v110_stat_callback(i, c); - if (isdn_tty_stat_callback(i, c)) - break; - break; - case ISDN_STAT_NODCH: - if (i < 0) - return -1; -#ifdef ISDN_DEBUG_STATCALLB - printk(KERN_DEBUG "NODCH: %ld\n", c->arg); -#endif - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - return 0; - if (isdn_net_stat_callback(i, c)) - break; - if (isdn_tty_stat_callback(i, c)) - break; - break; - case ISDN_STAT_ADDCH: - spin_lock_irqsave(&dev->lock, flags); - if (isdn_add_channels(dev->drv[di], di, c->arg, 1)) { - spin_unlock_irqrestore(&dev->lock, flags); - return -1; - } - spin_unlock_irqrestore(&dev->lock, flags); - isdn_info_update(); - break; - case ISDN_STAT_DISCH: - spin_lock_irqsave(&dev->lock, flags); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if ((dev->drvmap[i] == di) && - (dev->chanmap[i] == c->arg)) { - if (c->parm.num[0]) - dev->usage[i] &= ~ISDN_USAGE_DISABLED; - else - if (USG_NONE(dev->usage[i])) { - dev->usage[i] |= ISDN_USAGE_DISABLED; - } - else - retval = -1; - break; - } - spin_unlock_irqrestore(&dev->lock, flags); - isdn_info_update(); - break; - case ISDN_STAT_UNLOAD: - while (dev->drv[di]->locks > 0) { - isdn_unlock_driver(dev->drv[di]); - } - spin_lock_irqsave(&dev->lock, flags); - isdn_tty_stat_callback(i, c); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (dev->drvmap[i] == di) { - dev->drvmap[i] = -1; - dev->chanmap[i] = -1; - dev->usage[i] &= ~ISDN_USAGE_DISABLED; - } - dev->drivers--; - dev->channels -= dev->drv[di]->channels; - kfree(dev->drv[di]->rcverr); - kfree(dev->drv[di]->rcvcount); - for (i = 0; i < dev->drv[di]->channels; i++) - skb_queue_purge(&dev->drv[di]->rpqueue[i]); - kfree(dev->drv[di]->rpqueue); - kfree(dev->drv[di]->rcv_waitq); - kfree(dev->drv[di]); - dev->drv[di] = NULL; - dev->drvid[di][0] = '\0'; - isdn_info_update(); - set_global_features(); - spin_unlock_irqrestore(&dev->lock, flags); - return 0; - case ISDN_STAT_L1ERR: - break; - case CAPI_PUT_MESSAGE: - return (isdn_capi_rec_hl_msg(&c->parm.cmsg)); -#ifdef CONFIG_ISDN_TTY_FAX - case ISDN_STAT_FAXIND: - isdn_tty_stat_callback(i, c); - break; -#endif -#ifdef CONFIG_ISDN_AUDIO - case ISDN_STAT_AUDIO: - isdn_tty_stat_callback(i, c); - break; -#endif -#ifdef CONFIG_ISDN_DIVERSION - case ISDN_STAT_PROT: - case ISDN_STAT_REDIR: - if (divert_if) - return (divert_if->stat_callback(c)); -#endif /* CONFIG_ISDN_DIVERSION */ - /* fall through */ - default: - return -1; - } - return 0; -} - -/* - * Get integer from char-pointer, set pointer to end of number - */ -int -isdn_getnum(char **p) -{ - int v = -1; - - while (*p[0] >= '0' && *p[0] <= '9') - v = ((v < 0) ? 0 : (v * 10)) + (int) ((*p[0]++) - '0'); - return v; -} - -#define DLE 0x10 - -/* - * isdn_readbchan() tries to get data from the read-queue. - * It MUST be called with interrupts off. - * - * Be aware that this is not an atomic operation when sleep != 0, even though - * interrupts are turned off! Well, like that we are currently only called - * on behalf of a read system call on raw device files (which are documented - * to be dangerous and for debugging purpose only). The inode semaphore - * takes care that this is not called for the same minor device number while - * we are sleeping, but access is not serialized against simultaneous read() - * from the corresponding ttyI device. Can other ugly events, like changes - * of the mapping (di,ch)<->minor, happen during the sleep? --he - */ -int -isdn_readbchan(int di, int channel, u_char *buf, u_char *fp, int len, wait_queue_head_t *sleep) -{ - int count; - int count_pull; - int count_put; - int dflag; - struct sk_buff *skb; - u_char *cp; - - if (!dev->drv[di]) - return 0; - if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) { - if (sleep) - wait_event_interruptible(*sleep, - !skb_queue_empty(&dev->drv[di]->rpqueue[channel])); - else - return 0; - } - if (len > dev->drv[di]->rcvcount[channel]) - len = dev->drv[di]->rcvcount[channel]; - cp = buf; - count = 0; - while (len) { - if (!(skb = skb_peek(&dev->drv[di]->rpqueue[channel]))) - break; -#ifdef CONFIG_ISDN_AUDIO - if (ISDN_AUDIO_SKB_LOCK(skb)) - break; - ISDN_AUDIO_SKB_LOCK(skb) = 1; - if ((ISDN_AUDIO_SKB_DLECOUNT(skb)) || (dev->drv[di]->DLEflag & (1 << channel))) { - char *p = skb->data; - unsigned long DLEmask = (1 << channel); - - dflag = 0; - count_pull = count_put = 0; - while ((count_pull < skb->len) && (len > 0)) { - len--; - if (dev->drv[di]->DLEflag & DLEmask) { - *cp++ = DLE; - dev->drv[di]->DLEflag &= ~DLEmask; - } else { - *cp++ = *p; - if (*p == DLE) { - dev->drv[di]->DLEflag |= DLEmask; - (ISDN_AUDIO_SKB_DLECOUNT(skb))--; - } - p++; - count_pull++; - } - count_put++; - } - if (count_pull >= skb->len) - dflag = 1; - } else { -#endif - /* No DLE's in buff, so simply copy it */ - dflag = 1; - if ((count_pull = skb->len) > len) { - count_pull = len; - dflag = 0; - } - count_put = count_pull; - skb_copy_from_linear_data(skb, cp, count_put); - cp += count_put; - len -= count_put; -#ifdef CONFIG_ISDN_AUDIO - } -#endif - count += count_put; - if (fp) { - memset(fp, 0, count_put); - fp += count_put; - } - if (dflag) { - /* We got all the data in this buff. - * Now we can dequeue it. - */ - if (fp) - *(fp - 1) = 0xff; -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_LOCK(skb) = 0; -#endif - skb = skb_dequeue(&dev->drv[di]->rpqueue[channel]); - dev_kfree_skb(skb); - } else { - /* Not yet emptied this buff, so it - * must stay in the queue, for further calls - * but we pull off the data we got until now. - */ - skb_pull(skb, count_pull); -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_LOCK(skb) = 0; -#endif - } - dev->drv[di]->rcvcount[channel] -= count_put; - } - return count; -} - -/* - * isdn_readbchan_tty() tries to get data from the read-queue. - * It MUST be called with interrupts off. - * - * Be aware that this is not an atomic operation when sleep != 0, even though - * interrupts are turned off! Well, like that we are currently only called - * on behalf of a read system call on raw device files (which are documented - * to be dangerous and for debugging purpose only). The inode semaphore - * takes care that this is not called for the same minor device number while - * we are sleeping, but access is not serialized against simultaneous read() - * from the corresponding ttyI device. Can other ugly events, like changes - * of the mapping (di,ch)<->minor, happen during the sleep? --he - */ -int -isdn_readbchan_tty(int di, int channel, struct tty_port *port, int cisco_hack) -{ - int count; - int count_pull; - int count_put; - int dflag; - struct sk_buff *skb; - char last = 0; - int len; - - if (!dev->drv[di]) - return 0; - if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) - return 0; - - len = tty_buffer_request_room(port, dev->drv[di]->rcvcount[channel]); - if (len == 0) - return len; - - count = 0; - while (len) { - if (!(skb = skb_peek(&dev->drv[di]->rpqueue[channel]))) - break; -#ifdef CONFIG_ISDN_AUDIO - if (ISDN_AUDIO_SKB_LOCK(skb)) - break; - ISDN_AUDIO_SKB_LOCK(skb) = 1; - if ((ISDN_AUDIO_SKB_DLECOUNT(skb)) || (dev->drv[di]->DLEflag & (1 << channel))) { - char *p = skb->data; - unsigned long DLEmask = (1 << channel); - - dflag = 0; - count_pull = count_put = 0; - while ((count_pull < skb->len) && (len > 0)) { - /* push every character but the last to the tty buffer directly */ - if (count_put) - tty_insert_flip_char(port, last, TTY_NORMAL); - len--; - if (dev->drv[di]->DLEflag & DLEmask) { - last = DLE; - dev->drv[di]->DLEflag &= ~DLEmask; - } else { - last = *p; - if (last == DLE) { - dev->drv[di]->DLEflag |= DLEmask; - (ISDN_AUDIO_SKB_DLECOUNT(skb))--; - } - p++; - count_pull++; - } - count_put++; - } - if (count_pull >= skb->len) - dflag = 1; - } else { -#endif - /* No DLE's in buff, so simply copy it */ - dflag = 1; - if ((count_pull = skb->len) > len) { - count_pull = len; - dflag = 0; - } - count_put = count_pull; - if (count_put > 1) - tty_insert_flip_string(port, skb->data, count_put - 1); - last = skb->data[count_put - 1]; - len -= count_put; -#ifdef CONFIG_ISDN_AUDIO - } -#endif - count += count_put; - if (dflag) { - /* We got all the data in this buff. - * Now we can dequeue it. - */ - if (cisco_hack) - tty_insert_flip_char(port, last, 0xFF); - else - tty_insert_flip_char(port, last, TTY_NORMAL); -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_LOCK(skb) = 0; -#endif - skb = skb_dequeue(&dev->drv[di]->rpqueue[channel]); - dev_kfree_skb(skb); - } else { - tty_insert_flip_char(port, last, TTY_NORMAL); - /* Not yet emptied this buff, so it - * must stay in the queue, for further calls - * but we pull off the data we got until now. - */ - skb_pull(skb, count_pull); -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_LOCK(skb) = 0; -#endif - } - dev->drv[di]->rcvcount[channel] -= count_put; - } - return count; -} - - -static inline int -isdn_minor2drv(int minor) -{ - return (dev->drvmap[minor]); -} - -static inline int -isdn_minor2chan(int minor) -{ - return (dev->chanmap[minor]); -} - -static char * -isdn_statstr(void) -{ - static char istatbuf[2048]; - char *p; - int i; - - sprintf(istatbuf, "idmap:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - sprintf(p, "%s ", (dev->drvmap[i] < 0) ? "-" : dev->drvid[dev->drvmap[i]]); - p = istatbuf + strlen(istatbuf); - } - sprintf(p, "\nchmap:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - sprintf(p, "%d ", dev->chanmap[i]); - p = istatbuf + strlen(istatbuf); - } - sprintf(p, "\ndrmap:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - sprintf(p, "%d ", dev->drvmap[i]); - p = istatbuf + strlen(istatbuf); - } - sprintf(p, "\nusage:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - sprintf(p, "%d ", dev->usage[i]); - p = istatbuf + strlen(istatbuf); - } - sprintf(p, "\nflags:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_DRIVERS; i++) { - if (dev->drv[i]) { - sprintf(p, "%ld ", dev->drv[i]->online); - p = istatbuf + strlen(istatbuf); - } else { - sprintf(p, "? "); - p = istatbuf + strlen(istatbuf); - } - } - sprintf(p, "\nphone:\t"); - p = istatbuf + strlen(istatbuf); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - sprintf(p, "%s ", dev->num[i]); - p = istatbuf + strlen(istatbuf); - } - sprintf(p, "\n"); - return istatbuf; -} - -/* Module interface-code */ - -void -isdn_info_update(void) -{ - infostruct *p = dev->infochain; - - while (p) { - *(p->private) = 1; - p = (infostruct *) p->next; - } - wake_up_interruptible(&(dev->info_waitq)); -} - -static ssize_t -isdn_read(struct file *file, char __user *buf, size_t count, loff_t *off) -{ - uint minor = iminor(file_inode(file)); - int len = 0; - int drvidx; - int chidx; - int retval; - char *p; - - mutex_lock(&isdn_mutex); - if (minor == ISDN_MINOR_STATUS) { - if (!file->private_data) { - if (file->f_flags & O_NONBLOCK) { - retval = -EAGAIN; - goto out; - } - wait_event_interruptible(dev->info_waitq, - file->private_data); - } - p = isdn_statstr(); - file->private_data = NULL; - if ((len = strlen(p)) <= count) { - if (copy_to_user(buf, p, len)) { - retval = -EFAULT; - goto out; - } - *off += len; - retval = len; - goto out; - } - retval = 0; - goto out; - } - if (!dev->drivers) { - retval = -ENODEV; - goto out; - } - if (minor <= ISDN_MINOR_BMAX) { - printk(KERN_WARNING "isdn_read minor %d obsolete!\n", minor); - drvidx = isdn_minor2drv(minor); - if (drvidx < 0) { - retval = -ENODEV; - goto out; - } - if (!(dev->drv[drvidx]->flags & DRV_FLAG_RUNNING)) { - retval = -ENODEV; - goto out; - } - chidx = isdn_minor2chan(minor); - if (!(p = kmalloc(count, GFP_KERNEL))) { - retval = -ENOMEM; - goto out; - } - len = isdn_readbchan(drvidx, chidx, p, NULL, count, - &dev->drv[drvidx]->rcv_waitq[chidx]); - *off += len; - if (copy_to_user(buf, p, len)) - len = -EFAULT; - kfree(p); - retval = len; - goto out; - } - if (minor <= ISDN_MINOR_CTRLMAX) { - drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL); - if (drvidx < 0) { - retval = -ENODEV; - goto out; - } - if (!dev->drv[drvidx]->stavail) { - if (file->f_flags & O_NONBLOCK) { - retval = -EAGAIN; - goto out; - } - wait_event_interruptible(dev->drv[drvidx]->st_waitq, - dev->drv[drvidx]->stavail); - } - if (dev->drv[drvidx]->interface->readstat) { - if (count > dev->drv[drvidx]->stavail) - count = dev->drv[drvidx]->stavail; - len = dev->drv[drvidx]->interface->readstat(buf, count, - drvidx, isdn_minor2chan(minor - ISDN_MINOR_CTRL)); - if (len < 0) { - retval = len; - goto out; - } - } else { - len = 0; - } - if (len) - dev->drv[drvidx]->stavail -= len; - else - dev->drv[drvidx]->stavail = 0; - *off += len; - retval = len; - goto out; - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) { - retval = isdn_ppp_read(minor - ISDN_MINOR_PPP, file, buf, count); - goto out; - } -#endif - retval = -ENODEV; -out: - mutex_unlock(&isdn_mutex); - return retval; -} - -static ssize_t -isdn_write(struct file *file, const char __user *buf, size_t count, loff_t *off) -{ - uint minor = iminor(file_inode(file)); - int drvidx; - int chidx; - int retval; - - if (minor == ISDN_MINOR_STATUS) - return -EPERM; - if (!dev->drivers) - return -ENODEV; - - mutex_lock(&isdn_mutex); - if (minor <= ISDN_MINOR_BMAX) { - printk(KERN_WARNING "isdn_write minor %d obsolete!\n", minor); - drvidx = isdn_minor2drv(minor); - if (drvidx < 0) { - retval = -ENODEV; - goto out; - } - if (!(dev->drv[drvidx]->flags & DRV_FLAG_RUNNING)) { - retval = -ENODEV; - goto out; - } - chidx = isdn_minor2chan(minor); - wait_event_interruptible(dev->drv[drvidx]->snd_waitq[chidx], - (retval = isdn_writebuf_stub(drvidx, chidx, buf, count))); - goto out; - } - if (minor <= ISDN_MINOR_CTRLMAX) { - drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL); - if (drvidx < 0) { - retval = -ENODEV; - goto out; - } - /* - * We want to use the isdnctrl device to load the firmware - * - if (!(dev->drv[drvidx]->flags & DRV_FLAG_RUNNING)) - return -ENODEV; - */ - if (dev->drv[drvidx]->interface->writecmd) - retval = dev->drv[drvidx]->interface-> - writecmd(buf, count, drvidx, - isdn_minor2chan(minor - ISDN_MINOR_CTRL)); - else - retval = count; - goto out; - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) { - retval = isdn_ppp_write(minor - ISDN_MINOR_PPP, file, buf, count); - goto out; - } -#endif - retval = -ENODEV; -out: - mutex_unlock(&isdn_mutex); - return retval; -} - -static __poll_t -isdn_poll(struct file *file, poll_table *wait) -{ - __poll_t mask = 0; - unsigned int minor = iminor(file_inode(file)); - int drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL); - - mutex_lock(&isdn_mutex); - if (minor == ISDN_MINOR_STATUS) { - poll_wait(file, &(dev->info_waitq), wait); - /* mask = EPOLLOUT | EPOLLWRNORM; */ - if (file->private_data) { - mask |= EPOLLIN | EPOLLRDNORM; - } - goto out; - } - if (minor >= ISDN_MINOR_CTRL && minor <= ISDN_MINOR_CTRLMAX) { - if (drvidx < 0) { - /* driver deregistered while file open */ - mask = EPOLLHUP; - goto out; - } - poll_wait(file, &(dev->drv[drvidx]->st_waitq), wait); - mask = EPOLLOUT | EPOLLWRNORM; - if (dev->drv[drvidx]->stavail) { - mask |= EPOLLIN | EPOLLRDNORM; - } - goto out; - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) { - mask = isdn_ppp_poll(file, wait); - goto out; - } -#endif - mask = EPOLLERR; -out: - mutex_unlock(&isdn_mutex); - return mask; -} - - -static int -isdn_ioctl(struct file *file, uint cmd, ulong arg) -{ - uint minor = iminor(file_inode(file)); - isdn_ctrl c; - int drvidx; - int ret; - int i; - char __user *p; - char *s; - union iocpar { - char name[10]; - char bname[22]; - isdn_ioctl_struct iocts; - isdn_net_ioctl_phone phone; - isdn_net_ioctl_cfg cfg; - } iocpar; - void __user *argp = (void __user *)arg; - -#define name iocpar.name -#define bname iocpar.bname -#define iocts iocpar.iocts -#define phone iocpar.phone -#define cfg iocpar.cfg - - if (minor == ISDN_MINOR_STATUS) { - switch (cmd) { - case IIOCGETDVR: - return (TTY_DV + - (NET_DV << 8) + - (INF_DV << 16)); - case IIOCGETCPS: - if (arg) { - ulong __user *p = argp; - int i; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - put_user(dev->ibytes[i], p++); - put_user(dev->obytes[i], p++); - } - return 0; - } else - return -EINVAL; - break; - case IIOCNETGPN: - /* Get peer phone number of a connected - * isdn network interface */ - if (arg) { - if (copy_from_user(&phone, argp, sizeof(phone))) - return -EFAULT; - return isdn_net_getpeer(&phone, argp); - } else - return -EINVAL; - default: - return -EINVAL; - } - } - if (!dev->drivers) - return -ENODEV; - if (minor <= ISDN_MINOR_BMAX) { - drvidx = isdn_minor2drv(minor); - if (drvidx < 0) - return -ENODEV; - if (!(dev->drv[drvidx]->flags & DRV_FLAG_RUNNING)) - return -ENODEV; - return 0; - } - if (minor <= ISDN_MINOR_CTRLMAX) { -/* - * isdn net devices manage lots of configuration variables as linked lists. - * Those lists must only be manipulated from user space. Some of the ioctl's - * service routines access user space and are not atomic. Therefore, ioctl's - * manipulating the lists and ioctl's sleeping while accessing the lists - * are serialized by means of a semaphore. - */ - switch (cmd) { - case IIOCNETDWRSET: - printk(KERN_INFO "INFO: ISDN_DW_ABC_EXTENSION not enabled\n"); - return (-EINVAL); - case IIOCNETLCR: - printk(KERN_INFO "INFO: ISDN_ABC_LCR_SUPPORT not enabled\n"); - return -ENODEV; - case IIOCNETAIF: - /* Add a network-interface */ - if (arg) { - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - s = name; - } else { - s = NULL; - } - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - if ((s = isdn_net_new(s, NULL))) { - if (copy_to_user(argp, s, strlen(s) + 1)) { - ret = -EFAULT; - } else { - ret = 0; - } - } else - ret = -ENODEV; - mutex_unlock(&dev->mtx); - return ret; - case IIOCNETASL: - /* Add a slave to a network-interface */ - if (arg) { - if (copy_from_user(bname, argp, sizeof(bname) - 1)) - return -EFAULT; - bname[sizeof(bname)-1] = 0; - } else - return -EINVAL; - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - if ((s = isdn_net_newslave(bname))) { - if (copy_to_user(argp, s, strlen(s) + 1)) { - ret = -EFAULT; - } else { - ret = 0; - } - } else - ret = -ENODEV; - mutex_unlock(&dev->mtx); - return ret; - case IIOCNETDIF: - /* Delete a network-interface */ - if (arg) { - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - ret = isdn_net_rm(name); - mutex_unlock(&dev->mtx); - return ret; - } else - return -EINVAL; - case IIOCNETSCF: - /* Set configurable parameters of a network-interface */ - if (arg) { - if (copy_from_user(&cfg, argp, sizeof(cfg))) - return -EFAULT; - return isdn_net_setcfg(&cfg); - } else - return -EINVAL; - case IIOCNETGCF: - /* Get configurable parameters of a network-interface */ - if (arg) { - if (copy_from_user(&cfg, argp, sizeof(cfg))) - return -EFAULT; - if (!(ret = isdn_net_getcfg(&cfg))) { - if (copy_to_user(argp, &cfg, sizeof(cfg))) - return -EFAULT; - } - return ret; - } else - return -EINVAL; - case IIOCNETANM: - /* Add a phone-number to a network-interface */ - if (arg) { - if (copy_from_user(&phone, argp, sizeof(phone))) - return -EFAULT; - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - ret = isdn_net_addphone(&phone); - mutex_unlock(&dev->mtx); - return ret; - } else - return -EINVAL; - case IIOCNETGNM: - /* Get list of phone-numbers of a network-interface */ - if (arg) { - if (copy_from_user(&phone, argp, sizeof(phone))) - return -EFAULT; - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - ret = isdn_net_getphones(&phone, argp); - mutex_unlock(&dev->mtx); - return ret; - } else - return -EINVAL; - case IIOCNETDNM: - /* Delete a phone-number of a network-interface */ - if (arg) { - if (copy_from_user(&phone, argp, sizeof(phone))) - return -EFAULT; - ret = mutex_lock_interruptible(&dev->mtx); - if (ret) return ret; - ret = isdn_net_delphone(&phone); - mutex_unlock(&dev->mtx); - return ret; - } else - return -EINVAL; - case IIOCNETDIL: - /* Force dialing of a network-interface */ - if (arg) { - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - return isdn_net_force_dial(name); - } else - return -EINVAL; -#ifdef CONFIG_ISDN_PPP - case IIOCNETALN: - if (!arg) - return -EINVAL; - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - return isdn_ppp_dial_slave(name); - case IIOCNETDLN: - if (!arg) - return -EINVAL; - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - return isdn_ppp_hangup_slave(name); -#endif - case IIOCNETHUP: - /* Force hangup of a network-interface */ - if (!arg) - return -EINVAL; - if (copy_from_user(name, argp, sizeof(name))) - return -EFAULT; - return isdn_net_force_hangup(name); - break; - case IIOCSETVER: - dev->net_verbose = arg; - printk(KERN_INFO "isdn: Verbose-Level is %d\n", dev->net_verbose); - return 0; - case IIOCSETGST: - if (arg) - dev->global_flags |= ISDN_GLOBAL_STOPPED; - else - dev->global_flags &= ~ISDN_GLOBAL_STOPPED; - printk(KERN_INFO "isdn: Global Mode %s\n", - (dev->global_flags & ISDN_GLOBAL_STOPPED) ? "stopped" : "running"); - return 0; - case IIOCSETBRJ: - drvidx = -1; - if (arg) { - int i; - char *p; - if (copy_from_user(&iocts, argp, - sizeof(isdn_ioctl_struct))) - return -EFAULT; - iocts.drvid[sizeof(iocts.drvid) - 1] = 0; - if (strlen(iocts.drvid)) { - if ((p = strchr(iocts.drvid, ','))) - *p = 0; - drvidx = -1; - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - if (!(strcmp(dev->drvid[i], iocts.drvid))) { - drvidx = i; - break; - } - } - } - if (drvidx == -1) - return -ENODEV; - if (iocts.arg) - dev->drv[drvidx]->flags |= DRV_FLAG_REJBUS; - else - dev->drv[drvidx]->flags &= ~DRV_FLAG_REJBUS; - return 0; - case IIOCSIGPRF: - dev->profd = current; - return 0; - break; - case IIOCGETPRF: - /* Get all Modem-Profiles */ - if (arg) { - char __user *p = argp; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (copy_to_user(p, dev->mdm.info[i].emu.profile, - ISDN_MODEM_NUMREG)) - return -EFAULT; - p += ISDN_MODEM_NUMREG; - if (copy_to_user(p, dev->mdm.info[i].emu.pmsn, ISDN_MSNLEN)) - return -EFAULT; - p += ISDN_MSNLEN; - if (copy_to_user(p, dev->mdm.info[i].emu.plmsn, ISDN_LMSNLEN)) - return -EFAULT; - p += ISDN_LMSNLEN; - } - return (ISDN_MODEM_NUMREG + ISDN_MSNLEN + ISDN_LMSNLEN) * ISDN_MAX_CHANNELS; - } else - return -EINVAL; - break; - case IIOCSETPRF: - /* Set all Modem-Profiles */ - if (arg) { - char __user *p = argp; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (copy_from_user(dev->mdm.info[i].emu.profile, p, - ISDN_MODEM_NUMREG)) - return -EFAULT; - p += ISDN_MODEM_NUMREG; - if (copy_from_user(dev->mdm.info[i].emu.plmsn, p, ISDN_LMSNLEN)) - return -EFAULT; - p += ISDN_LMSNLEN; - if (copy_from_user(dev->mdm.info[i].emu.pmsn, p, ISDN_MSNLEN)) - return -EFAULT; - p += ISDN_MSNLEN; - } - return 0; - } else - return -EINVAL; - break; - case IIOCSETMAP: - case IIOCGETMAP: - /* Set/Get MSN->EAZ-Mapping for a driver */ - if (arg) { - - if (copy_from_user(&iocts, argp, - sizeof(isdn_ioctl_struct))) - return -EFAULT; - iocts.drvid[sizeof(iocts.drvid) - 1] = 0; - if (strlen(iocts.drvid)) { - drvidx = -1; - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - if (!(strcmp(dev->drvid[i], iocts.drvid))) { - drvidx = i; - break; - } - } else - drvidx = 0; - if (drvidx == -1) - return -ENODEV; - if (cmd == IIOCSETMAP) { - int loop = 1; - - p = (char __user *) iocts.arg; - i = 0; - while (loop) { - int j = 0; - - while (1) { - get_user(bname[j], p++); - switch (bname[j]) { - case '\0': - loop = 0; - /* Fall through */ - case ',': - bname[j] = '\0'; - strcpy(dev->drv[drvidx]->msn2eaz[i], bname); - j = ISDN_MSNLEN; - break; - default: - j++; - } - if (j >= ISDN_MSNLEN) - break; - } - if (++i > 9) - break; - } - } else { - p = (char __user *) iocts.arg; - for (i = 0; i < 10; i++) { - snprintf(bname, sizeof(bname), "%s%s", - strlen(dev->drv[drvidx]->msn2eaz[i]) ? - dev->drv[drvidx]->msn2eaz[i] : "_", - (i < 9) ? "," : "\0"); - if (copy_to_user(p, bname, strlen(bname) + 1)) - return -EFAULT; - p += strlen(bname); - } - } - return 0; - } else - return -EINVAL; - case IIOCDBGVAR: - return -EINVAL; - default: - if ((cmd & IIOCDRVCTL) == IIOCDRVCTL) - cmd = ((cmd >> _IOC_NRSHIFT) & _IOC_NRMASK) & ISDN_DRVIOCTL_MASK; - else - return -EINVAL; - if (arg) { - int i; - char *p; - if (copy_from_user(&iocts, argp, sizeof(isdn_ioctl_struct))) - return -EFAULT; - iocts.drvid[sizeof(iocts.drvid) - 1] = 0; - if (strlen(iocts.drvid)) { - if ((p = strchr(iocts.drvid, ','))) - *p = 0; - drvidx = -1; - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - if (!(strcmp(dev->drvid[i], iocts.drvid))) { - drvidx = i; - break; - } - } else - drvidx = 0; - if (drvidx == -1) - return -ENODEV; - c.driver = drvidx; - c.command = ISDN_CMD_IOCTL; - c.arg = cmd; - memcpy(c.parm.num, &iocts.arg, sizeof(ulong)); - ret = isdn_command(&c); - memcpy(&iocts.arg, c.parm.num, sizeof(ulong)); - if (copy_to_user(argp, &iocts, sizeof(isdn_ioctl_struct))) - return -EFAULT; - return ret; - } else - return -EINVAL; - } - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) - return (isdn_ppp_ioctl(minor - ISDN_MINOR_PPP, file, cmd, arg)); -#endif - return -ENODEV; - -#undef name -#undef bname -#undef iocts -#undef phone -#undef cfg -} - -static long -isdn_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - int ret; - - mutex_lock(&isdn_mutex); - ret = isdn_ioctl(file, cmd, arg); - mutex_unlock(&isdn_mutex); - - return ret; -} - -/* - * Open the device code. - */ -static int -isdn_open(struct inode *ino, struct file *filep) -{ - uint minor = iminor(ino); - int drvidx; - int chidx; - int retval = -ENODEV; - - mutex_lock(&isdn_mutex); - if (minor == ISDN_MINOR_STATUS) { - infostruct *p; - - if ((p = kmalloc(sizeof(infostruct), GFP_KERNEL))) { - p->next = (char *) dev->infochain; - p->private = (char *) &(filep->private_data); - dev->infochain = p; - /* At opening we allow a single update */ - filep->private_data = (char *) 1; - retval = 0; - goto out; - } else { - retval = -ENOMEM; - goto out; - } - } - if (!dev->channels) - goto out; - if (minor <= ISDN_MINOR_BMAX) { - printk(KERN_WARNING "isdn_open minor %d obsolete!\n", minor); - drvidx = isdn_minor2drv(minor); - if (drvidx < 0) - goto out; - chidx = isdn_minor2chan(minor); - if (!(dev->drv[drvidx]->flags & DRV_FLAG_RUNNING)) - goto out; - if (!(dev->drv[drvidx]->online & (1 << chidx))) - goto out; - isdn_lock_drivers(); - retval = 0; - goto out; - } - if (minor <= ISDN_MINOR_CTRLMAX) { - drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL); - if (drvidx < 0) - goto out; - isdn_lock_drivers(); - retval = 0; - goto out; - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) { - retval = isdn_ppp_open(minor - ISDN_MINOR_PPP, filep); - if (retval == 0) - isdn_lock_drivers(); - goto out; - } -#endif -out: - nonseekable_open(ino, filep); - mutex_unlock(&isdn_mutex); - return retval; -} - -static int -isdn_close(struct inode *ino, struct file *filep) -{ - uint minor = iminor(ino); - - mutex_lock(&isdn_mutex); - if (minor == ISDN_MINOR_STATUS) { - infostruct *p = dev->infochain; - infostruct *q = NULL; - - while (p) { - if (p->private == (char *) &(filep->private_data)) { - if (q) - q->next = p->next; - else - dev->infochain = (infostruct *) (p->next); - kfree(p); - goto out; - } - q = p; - p = (infostruct *) (p->next); - } - printk(KERN_WARNING "isdn: No private data while closing isdnctrl\n"); - goto out; - } - isdn_unlock_drivers(); - if (minor <= ISDN_MINOR_BMAX) - goto out; - if (minor <= ISDN_MINOR_CTRLMAX) { - if (dev->profd == current) - dev->profd = NULL; - goto out; - } -#ifdef CONFIG_ISDN_PPP - if (minor <= ISDN_MINOR_PPPMAX) - isdn_ppp_release(minor - ISDN_MINOR_PPP, filep); -#endif - -out: - mutex_unlock(&isdn_mutex); - return 0; -} - -static const struct file_operations isdn_fops = -{ - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = isdn_read, - .write = isdn_write, - .poll = isdn_poll, - .unlocked_ioctl = isdn_unlocked_ioctl, - .open = isdn_open, - .release = isdn_close, -}; - -char * -isdn_map_eaz2msn(char *msn, int di) -{ - isdn_driver_t *this = dev->drv[di]; - int i; - - if (strlen(msn) == 1) { - i = msn[0] - '0'; - if ((i >= 0) && (i <= 9)) - if (strlen(this->msn2eaz[i])) - return (this->msn2eaz[i]); - } - return (msn); -} - -/* - * Find an unused ISDN-channel, whose feature-flags match the - * given L2- and L3-protocols. - */ -#define L2V (~(ISDN_FEATURE_L2_V11096 | ISDN_FEATURE_L2_V11019 | ISDN_FEATURE_L2_V11038)) - -/* - * This function must be called with holding the dev->lock. - */ -int -isdn_get_free_channel(int usage, int l2_proto, int l3_proto, int pre_dev - , int pre_chan, char *msn) -{ - int i; - ulong features; - ulong vfeatures; - - features = ((1 << l2_proto) | (0x10000 << l3_proto)); - vfeatures = (((1 << l2_proto) | (0x10000 << l3_proto)) & - ~(ISDN_FEATURE_L2_V11096 | ISDN_FEATURE_L2_V11019 | ISDN_FEATURE_L2_V11038)); - /* If Layer-2 protocol is V.110, accept drivers with - * transparent feature even if these don't support V.110 - * because we can emulate this in linklevel. - */ - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (USG_NONE(dev->usage[i]) && - (dev->drvmap[i] != -1)) { - int d = dev->drvmap[i]; - if ((dev->usage[i] & ISDN_USAGE_EXCLUSIVE) && - ((pre_dev != d) || (pre_chan != dev->chanmap[i]))) - continue; - if (!strcmp(isdn_map_eaz2msn(msn, d), "-")) - continue; - if (dev->usage[i] & ISDN_USAGE_DISABLED) - continue; /* usage not allowed */ - if (dev->drv[d]->flags & DRV_FLAG_RUNNING) { - if (((dev->drv[d]->interface->features & features) == features) || - (((dev->drv[d]->interface->features & vfeatures) == vfeatures) && - (dev->drv[d]->interface->features & ISDN_FEATURE_L2_TRANS))) { - if ((pre_dev < 0) || (pre_chan < 0)) { - dev->usage[i] &= ISDN_USAGE_EXCLUSIVE; - dev->usage[i] |= usage; - isdn_info_update(); - return i; - } else { - if ((pre_dev == d) && (pre_chan == dev->chanmap[i])) { - dev->usage[i] &= ISDN_USAGE_EXCLUSIVE; - dev->usage[i] |= usage; - isdn_info_update(); - return i; - } - } - } - } - } - return -1; -} - -/* - * Set state of ISDN-channel to 'unused' - */ -void -isdn_free_channel(int di, int ch, int usage) -{ - int i; - - if ((di < 0) || (ch < 0)) { - printk(KERN_WARNING "%s: called with invalid drv(%d) or channel(%d)\n", - __func__, di, ch); - return; - } - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (((!usage) || ((dev->usage[i] & ISDN_USAGE_MASK) == usage)) && - (dev->drvmap[i] == di) && - (dev->chanmap[i] == ch)) { - dev->usage[i] &= (ISDN_USAGE_NONE | ISDN_USAGE_EXCLUSIVE); - strcpy(dev->num[i], "???"); - dev->ibytes[i] = 0; - dev->obytes[i] = 0; -// 20.10.99 JIM, try to reinitialize v110 ! - dev->v110emu[i] = 0; - atomic_set(&(dev->v110use[i]), 0); - isdn_v110_close(dev->v110[i]); - dev->v110[i] = NULL; -// 20.10.99 JIM, try to reinitialize v110 ! - isdn_info_update(); - if (dev->drv[di]) - skb_queue_purge(&dev->drv[di]->rpqueue[ch]); - } -} - -/* - * Cancel Exclusive-Flag for ISDN-channel - */ -void -isdn_unexclusive_channel(int di, int ch) -{ - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if ((dev->drvmap[i] == di) && - (dev->chanmap[i] == ch)) { - dev->usage[i] &= ~ISDN_USAGE_EXCLUSIVE; - isdn_info_update(); - return; - } -} - -/* - * writebuf replacement for SKB_ABLE drivers - */ -static int -isdn_writebuf_stub(int drvidx, int chan, const u_char __user *buf, int len) -{ - int ret; - int hl = dev->drv[drvidx]->interface->hl_hdrlen; - struct sk_buff *skb = alloc_skb(hl + len, GFP_ATOMIC); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, hl); - if (copy_from_user(skb_put(skb, len), buf, len)) { - dev_kfree_skb(skb); - return -EFAULT; - } - ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, 1, skb); - if (ret <= 0) - dev_kfree_skb(skb); - if (ret > 0) - dev->obytes[isdn_dc2minor(drvidx, chan)] += ret; - return ret; -} - -/* - * Return: length of data on success, -ERRcode on failure. - */ -int -isdn_writebuf_skb_stub(int drvidx, int chan, int ack, struct sk_buff *skb) -{ - int ret; - struct sk_buff *nskb = NULL; - int v110_ret = skb->len; - int idx = isdn_dc2minor(drvidx, chan); - - if (dev->v110[idx]) { - atomic_inc(&dev->v110use[idx]); - nskb = isdn_v110_encode(dev->v110[idx], skb); - atomic_dec(&dev->v110use[idx]); - if (!nskb) - return 0; - v110_ret = *((int *)nskb->data); - skb_pull(nskb, sizeof(int)); - if (!nskb->len) { - dev_kfree_skb(nskb); - return v110_ret; - } - /* V.110 must always be acknowledged */ - ack = 1; - ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, ack, nskb); - } else { - int hl = dev->drv[drvidx]->interface->hl_hdrlen; - - if (skb_headroom(skb) < hl) { - /* - * This should only occur when new HL driver with - * increased hl_hdrlen was loaded after netdevice - * was created and connected to the new driver. - * - * The V.110 branch (re-allocates on its own) does - * not need this - */ - struct sk_buff *skb_tmp; - - skb_tmp = skb_realloc_headroom(skb, hl); - printk(KERN_DEBUG "isdn_writebuf_skb_stub: reallocating headroom%s\n", skb_tmp ? "" : " failed"); - if (!skb_tmp) return -ENOMEM; /* 0 better? */ - ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, ack, skb_tmp); - if (ret > 0) { - dev_kfree_skb(skb); - } else { - dev_kfree_skb(skb_tmp); - } - } else { - ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, ack, skb); - } - } - if (ret > 0) { - dev->obytes[idx] += ret; - if (dev->v110[idx]) { - atomic_inc(&dev->v110use[idx]); - dev->v110[idx]->skbuser++; - atomic_dec(&dev->v110use[idx]); - /* For V.110 return unencoded data length */ - ret = v110_ret; - /* if the complete frame was send we free the skb; - if not upper function will requeue the skb */ - if (ret == skb->len) - dev_kfree_skb(skb); - } - } else - if (dev->v110[idx]) - dev_kfree_skb(nskb); - return ret; -} - -static int -isdn_add_channels(isdn_driver_t *d, int drvidx, int n, int adding) -{ - int j, k, m; - - init_waitqueue_head(&d->st_waitq); - if (d->flags & DRV_FLAG_RUNNING) - return -1; - if (n < 1) return 0; - - m = (adding) ? d->channels + n : n; - - if (dev->channels + n > ISDN_MAX_CHANNELS) { - printk(KERN_WARNING "register_isdn: Max. %d channels supported\n", - ISDN_MAX_CHANNELS); - return -1; - } - - if ((adding) && (d->rcverr)) - kfree(d->rcverr); - if (!(d->rcverr = kcalloc(m, sizeof(int), GFP_ATOMIC))) { - printk(KERN_WARNING "register_isdn: Could not alloc rcverr\n"); - return -1; - } - - if ((adding) && (d->rcvcount)) - kfree(d->rcvcount); - if (!(d->rcvcount = kcalloc(m, sizeof(int), GFP_ATOMIC))) { - printk(KERN_WARNING "register_isdn: Could not alloc rcvcount\n"); - if (!adding) - kfree(d->rcverr); - return -1; - } - - if ((adding) && (d->rpqueue)) { - for (j = 0; j < d->channels; j++) - skb_queue_purge(&d->rpqueue[j]); - kfree(d->rpqueue); - } - d->rpqueue = kmalloc_array(m, sizeof(struct sk_buff_head), GFP_ATOMIC); - if (!d->rpqueue) { - printk(KERN_WARNING "register_isdn: Could not alloc rpqueue\n"); - if (!adding) { - kfree(d->rcvcount); - kfree(d->rcverr); - } - return -1; - } - for (j = 0; j < m; j++) { - skb_queue_head_init(&d->rpqueue[j]); - } - - if ((adding) && (d->rcv_waitq)) - kfree(d->rcv_waitq); - d->rcv_waitq = kmalloc(array3_size(sizeof(wait_queue_head_t), 2, m), - GFP_ATOMIC); - if (!d->rcv_waitq) { - printk(KERN_WARNING "register_isdn: Could not alloc rcv_waitq\n"); - if (!adding) { - kfree(d->rpqueue); - kfree(d->rcvcount); - kfree(d->rcverr); - } - return -1; - } - d->snd_waitq = d->rcv_waitq + m; - for (j = 0; j < m; j++) { - init_waitqueue_head(&d->rcv_waitq[j]); - init_waitqueue_head(&d->snd_waitq[j]); - } - - dev->channels += n; - for (j = d->channels; j < m; j++) - for (k = 0; k < ISDN_MAX_CHANNELS; k++) - if (dev->chanmap[k] < 0) { - dev->chanmap[k] = j; - dev->drvmap[k] = drvidx; - break; - } - d->channels = m; - return 0; -} - -/* - * Low-level-driver registration - */ - -static void -set_global_features(void) -{ - int drvidx; - - dev->global_features = 0; - for (drvidx = 0; drvidx < ISDN_MAX_DRIVERS; drvidx++) { - if (!dev->drv[drvidx]) - continue; - if (dev->drv[drvidx]->interface) - dev->global_features |= dev->drv[drvidx]->interface->features; - } -} - -#ifdef CONFIG_ISDN_DIVERSION - -static char *map_drvname(int di) -{ - if ((di < 0) || (di >= ISDN_MAX_DRIVERS)) - return (NULL); - return (dev->drvid[di]); /* driver name */ -} /* map_drvname */ - -static int map_namedrv(char *id) -{ int i; - - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - { if (!strcmp(dev->drvid[i], id)) - return (i); - } - return (-1); -} /* map_namedrv */ - -int DIVERT_REG_NAME(isdn_divert_if *i_div) -{ - if (i_div->if_magic != DIVERT_IF_MAGIC) - return (DIVERT_VER_ERR); - switch (i_div->cmd) - { - case DIVERT_CMD_REL: - if (divert_if != i_div) - return (DIVERT_REL_ERR); - divert_if = NULL; /* free interface */ - return (DIVERT_NO_ERR); - - case DIVERT_CMD_REG: - if (divert_if) - return (DIVERT_REG_ERR); - i_div->ll_cmd = isdn_command; /* set command function */ - i_div->drv_to_name = map_drvname; - i_div->name_to_drv = map_namedrv; - divert_if = i_div; /* remember interface */ - return (DIVERT_NO_ERR); - - default: - return (DIVERT_CMD_ERR); - } -} /* DIVERT_REG_NAME */ - -EXPORT_SYMBOL(DIVERT_REG_NAME); - -#endif /* CONFIG_ISDN_DIVERSION */ - - -EXPORT_SYMBOL(register_isdn); -#ifdef CONFIG_ISDN_PPP -EXPORT_SYMBOL(isdn_ppp_register_compressor); -EXPORT_SYMBOL(isdn_ppp_unregister_compressor); -#endif - -int -register_isdn(isdn_if *i) -{ - isdn_driver_t *d; - int j; - ulong flags; - int drvidx; - - if (dev->drivers >= ISDN_MAX_DRIVERS) { - printk(KERN_WARNING "register_isdn: Max. %d drivers supported\n", - ISDN_MAX_DRIVERS); - return 0; - } - if (!i->writebuf_skb) { - printk(KERN_WARNING "register_isdn: No write routine given.\n"); - return 0; - } - if (!(d = kzalloc(sizeof(isdn_driver_t), GFP_KERNEL))) { - printk(KERN_WARNING "register_isdn: Could not alloc driver-struct\n"); - return 0; - } - - d->maxbufsize = i->maxbufsize; - d->pktcount = 0; - d->stavail = 0; - d->flags = DRV_FLAG_LOADED; - d->online = 0; - d->interface = i; - d->channels = 0; - spin_lock_irqsave(&dev->lock, flags); - for (drvidx = 0; drvidx < ISDN_MAX_DRIVERS; drvidx++) - if (!dev->drv[drvidx]) - break; - if (isdn_add_channels(d, drvidx, i->channels, 0)) { - spin_unlock_irqrestore(&dev->lock, flags); - kfree(d); - return 0; - } - i->channels = drvidx; - i->rcvcallb_skb = isdn_receive_skb_callback; - i->statcallb = isdn_status_callback; - if (!strlen(i->id)) - sprintf(i->id, "line%d", drvidx); - for (j = 0; j < drvidx; j++) - if (!strcmp(i->id, dev->drvid[j])) - sprintf(i->id, "line%d", drvidx); - dev->drv[drvidx] = d; - strcpy(dev->drvid[drvidx], i->id); - isdn_info_update(); - dev->drivers++; - set_global_features(); - spin_unlock_irqrestore(&dev->lock, flags); - return 1; -} - -/* -***************************************************************************** -* And now the modules code. -***************************************************************************** -*/ - -static char * -isdn_getrev(const char *revision) -{ - char *rev; - char *p; - - if ((p = strchr(revision, ':'))) { - rev = p + 2; - p = strchr(rev, '$'); - *--p = 0; - } else - rev = "???"; - return rev; -} - -/* - * Allocate and initialize all data, register modem-devices - */ -static int __init isdn_init(void) -{ - int i; - char tmprev[50]; - - dev = vzalloc(sizeof(isdn_dev)); - if (!dev) { - printk(KERN_WARNING "isdn: Could not allocate device-struct.\n"); - return -EIO; - } - timer_setup(&dev->timer, isdn_timer_funct, 0); - spin_lock_init(&dev->lock); - spin_lock_init(&dev->timerlock); -#ifdef MODULE - dev->owner = THIS_MODULE; -#endif - mutex_init(&dev->mtx); - init_waitqueue_head(&dev->info_waitq); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - dev->drvmap[i] = -1; - dev->chanmap[i] = -1; - dev->m_idx[i] = -1; - strcpy(dev->num[i], "???"); - } - if (register_chrdev(ISDN_MAJOR, "isdn", &isdn_fops)) { - printk(KERN_WARNING "isdn: Could not register control devices\n"); - vfree(dev); - return -EIO; - } - if ((isdn_tty_modem_init()) < 0) { - printk(KERN_WARNING "isdn: Could not register tty devices\n"); - vfree(dev); - unregister_chrdev(ISDN_MAJOR, "isdn"); - return -EIO; - } -#ifdef CONFIG_ISDN_PPP - if (isdn_ppp_init() < 0) { - printk(KERN_WARNING "isdn: Could not create PPP-device-structs\n"); - isdn_tty_exit(); - unregister_chrdev(ISDN_MAJOR, "isdn"); - vfree(dev); - return -EIO; - } -#endif /* CONFIG_ISDN_PPP */ - - strcpy(tmprev, isdn_revision); - printk(KERN_NOTICE "ISDN subsystem Rev: %s/", isdn_getrev(tmprev)); - strcpy(tmprev, isdn_net_revision); - printk("%s/", isdn_getrev(tmprev)); - strcpy(tmprev, isdn_ppp_revision); - printk("%s/", isdn_getrev(tmprev)); - strcpy(tmprev, isdn_audio_revision); - printk("%s/", isdn_getrev(tmprev)); - strcpy(tmprev, isdn_v110_revision); - printk("%s", isdn_getrev(tmprev)); - -#ifdef MODULE - printk(" loaded\n"); -#else - printk("\n"); -#endif - isdn_info_update(); - return 0; -} - -/* - * Unload module - */ -static void __exit isdn_exit(void) -{ -#ifdef CONFIG_ISDN_PPP - isdn_ppp_cleanup(); -#endif - if (isdn_net_rmall() < 0) { - printk(KERN_WARNING "isdn: net-device busy, remove cancelled\n"); - return; - } - isdn_tty_exit(); - unregister_chrdev(ISDN_MAJOR, "isdn"); - del_timer_sync(&dev->timer); - /* call vfree with interrupts enabled, else it will hang */ - vfree(dev); - printk(KERN_NOTICE "ISDN-subsystem unloaded\n"); -} - -module_init(isdn_init); -module_exit(isdn_exit); diff --git a/drivers/isdn/i4l/isdn_common.h b/drivers/isdn/i4l/isdn_common.h deleted file mode 100644 index 2260ef07ab9c..000000000000 --- a/drivers/isdn/i4l/isdn_common.h +++ /dev/null @@ -1,47 +0,0 @@ -/* $Id: isdn_common.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem - * common used functions and debugging-switches (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#undef ISDN_DEBUG_MODEM_OPEN -#undef ISDN_DEBUG_MODEM_IOCTL -#undef ISDN_DEBUG_MODEM_WAITSENT -#undef ISDN_DEBUG_MODEM_HUP -#undef ISDN_DEBUG_MODEM_ICALL -#undef ISDN_DEBUG_MODEM_DUMP -#undef ISDN_DEBUG_MODEM_VOICE -#undef ISDN_DEBUG_AT -#undef ISDN_DEBUG_NET_DUMP -#undef ISDN_DEBUG_NET_DIAL -#undef ISDN_DEBUG_NET_ICALL - -/* Prototypes */ -extern void isdn_lock_drivers(void); -extern void isdn_unlock_drivers(void); -extern void isdn_free_channel(int di, int ch, int usage); -extern void isdn_all_eaz(int di, int ch); -extern int isdn_command(isdn_ctrl *); -extern int isdn_dc2minor(int di, int ch); -extern void isdn_info_update(void); -extern char *isdn_map_eaz2msn(char *msn, int di); -extern void isdn_timer_ctrl(int tf, int onoff); -extern void isdn_unexclusive_channel(int di, int ch); -extern int isdn_getnum(char **); -extern int isdn_readbchan(int, int, u_char *, u_char *, int, wait_queue_head_t *); -extern int isdn_readbchan_tty(int, int, struct tty_port *, int); -extern int isdn_get_free_channel(int, int, int, int, int, char *); -extern int isdn_writebuf_skb_stub(int, int, int, struct sk_buff *); -extern int register_isdn(isdn_if *i); -extern int isdn_msncmp(const char *, const char *); -#if defined(ISDN_DEBUG_NET_DUMP) || defined(ISDN_DEBUG_MODEM_DUMP) -extern void isdn_dumppkt(char *, u_char *, int, int); -#endif diff --git a/drivers/isdn/i4l/isdn_concap.c b/drivers/isdn/i4l/isdn_concap.c deleted file mode 100644 index 336523ec077c..000000000000 --- a/drivers/isdn/i4l/isdn_concap.c +++ /dev/null @@ -1,99 +0,0 @@ -/* $Id: isdn_concap.c,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, protocol encapsulation - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -/* Stuff to support the concap_proto by isdn4linux. isdn4linux - specific - * stuff goes here. Stuff that depends only on the concap protocol goes to - * another -- protocol specific -- source file. - * - */ - - -#include -#include "isdn_x25iface.h" -#include "isdn_net.h" -#include -#include "isdn_concap.h" - - -/* The following set of device service operations are for encapsulation - protocols that require for reliable datalink semantics. That means: - - - before any data is to be submitted the connection must explicitly - be set up. - - after the successful set up of the connection is signalled the - connection is considered to be reliably up. - - Auto-dialing ist not compatible with this requirements. Thus, auto-dialing - is completely bypassed. - - It might be possible to implement a (non standardized) datalink protocol - that provides a reliable data link service while using some auto dialing - mechanism. Such a protocol would need an auxiliary channel (i.e. user-user- - signaling on the D-channel) while the B-channel is down. -*/ - - -static int isdn_concap_dl_data_req(struct concap_proto *concap, struct sk_buff *skb) -{ - struct net_device *ndev = concap->net_dev; - isdn_net_dev *nd = ((isdn_net_local *) netdev_priv(ndev))->netdev; - isdn_net_local *lp = isdn_net_get_locked_lp(nd); - - IX25DEBUG("isdn_concap_dl_data_req: %s \n", concap->net_dev->name); - if (!lp) { - IX25DEBUG("isdn_concap_dl_data_req: %s : isdn_net_send_skb returned %d\n", concap->net_dev->name, 1); - return 1; - } - lp->huptimer = 0; - isdn_net_writebuf_skb(lp, skb); - spin_unlock_bh(&lp->xmit_lock); - IX25DEBUG("isdn_concap_dl_data_req: %s : isdn_net_send_skb returned %d\n", concap->net_dev->name, 0); - return 0; -} - - -static int isdn_concap_dl_connect_req(struct concap_proto *concap) -{ - struct net_device *ndev = concap->net_dev; - isdn_net_local *lp = netdev_priv(ndev); - int ret; - IX25DEBUG("isdn_concap_dl_connect_req: %s \n", ndev->name); - - /* dial ... */ - ret = isdn_net_dial_req(lp); - if (ret) IX25DEBUG("dialing failed\n"); - return ret; -} - -static int isdn_concap_dl_disconn_req(struct concap_proto *concap) -{ - IX25DEBUG("isdn_concap_dl_disconn_req: %s \n", concap->net_dev->name); - - isdn_net_hangup(concap->net_dev); - return 0; -} - -struct concap_device_ops isdn_concap_reliable_dl_dops = { - .data_req = &isdn_concap_dl_data_req, - .connect_req = &isdn_concap_dl_connect_req, - .disconn_req = &isdn_concap_dl_disconn_req -}; - -/* The following should better go into a dedicated source file such that - this sourcefile does not need to include any protocol specific header - files. For now: -*/ -struct concap_proto *isdn_concap_new(int encap) -{ - switch (encap) { - case ISDN_NET_ENCAP_X25IFACE: - return isdn_x25iface_proto_new(); - } - return NULL; -} diff --git a/drivers/isdn/i4l/isdn_concap.h b/drivers/isdn/i4l/isdn_concap.h deleted file mode 100644 index cd7e3ba74e25..000000000000 --- a/drivers/isdn/i4l/isdn_concap.h +++ /dev/null @@ -1,11 +0,0 @@ -/* $Id: isdn_concap.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, protocol encapsulation - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -extern struct concap_device_ops isdn_concap_reliable_dl_dops; -extern struct concap_proto *isdn_concap_new(int); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c deleted file mode 100644 index c138f66f2659..000000000000 --- a/drivers/isdn/i4l/isdn_net.c +++ /dev/null @@ -1,3198 +0,0 @@ -/* $Id: isdn_net.c,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, network interfaces and related functions (linklevel). - * - * Copyright 1994-1998 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * Data Over Voice (DOV) support added - Guy Ellis 23-Mar-02 - * guy@traverse.com.au - * Outgoing calls - looks for a 'V' in first char of dialed number - * Incoming calls - checks first character of eaz as follows: - * Numeric - accept DATA only - original functionality - * 'V' - accept VOICE (DOV) only - * 'B' - accept BOTH DATA and DOV types - * - * Jan 2001: fix CISCO HDLC Bjoern A. Zeeb - * for info on the protocol, see - * http://i4l.zabbadoz.net/i4l/cisco-hdlc.txt - */ - -#include -#include -#include -#include -#include -#include -#include "isdn_common.h" -#include "isdn_net.h" -#ifdef CONFIG_ISDN_PPP -#include "isdn_ppp.h" -#endif -#ifdef CONFIG_ISDN_X25 -#include -#include "isdn_concap.h" -#endif - - -/* - * Outline of new tbusy handling: - * - * Old method, roughly spoken, consisted of setting tbusy when entering - * isdn_net_start_xmit() and at several other locations and clearing - * it from isdn_net_start_xmit() thread when sending was successful. - * - * With 2.3.x multithreaded network core, to prevent problems, tbusy should - * only be set by the isdn_net_start_xmit() thread and only when a tx-busy - * condition is detected. Other threads (in particular isdn_net_stat_callb()) - * are only allowed to clear tbusy. - * - * -HE - */ - -/* - * About SOFTNET: - * Most of the changes were pretty obvious and basically done by HE already. - * - * One problem of the isdn net device code is that it uses struct net_device - * for masters and slaves. However, only master interface are registered to - * the network layer, and therefore, it only makes sense to call netif_* - * functions on them. - * - * --KG - */ - -/* - * Find out if the netdevice has been ifup-ed yet. - * For slaves, look at the corresponding master. - */ -static __inline__ int isdn_net_device_started(isdn_net_dev *n) -{ - isdn_net_local *lp = n->local; - struct net_device *dev; - - if (lp->master) - dev = lp->master; - else - dev = n->dev; - return netif_running(dev); -} - -/* - * wake up the network -> net_device queue. - * For slaves, wake the corresponding master interface. - */ -static __inline__ void isdn_net_device_wake_queue(isdn_net_local *lp) -{ - if (lp->master) - netif_wake_queue(lp->master); - else - netif_wake_queue(lp->netdev->dev); -} - -/* - * stop the network -> net_device queue. - * For slaves, stop the corresponding master interface. - */ -static __inline__ void isdn_net_device_stop_queue(isdn_net_local *lp) -{ - if (lp->master) - netif_stop_queue(lp->master); - else - netif_stop_queue(lp->netdev->dev); -} - -/* - * find out if the net_device which this lp belongs to (lp can be - * master or slave) is busy. It's busy iff all (master and slave) - * queues are busy - */ -static __inline__ int isdn_net_device_busy(isdn_net_local *lp) -{ - isdn_net_local *nlp; - isdn_net_dev *nd; - unsigned long flags; - - if (!isdn_net_lp_busy(lp)) - return 0; - - if (lp->master) - nd = ISDN_MASTER_PRIV(lp)->netdev; - else - nd = lp->netdev; - - spin_lock_irqsave(&nd->queue_lock, flags); - nlp = lp->next; - while (nlp != lp) { - if (!isdn_net_lp_busy(nlp)) { - spin_unlock_irqrestore(&nd->queue_lock, flags); - return 0; - } - nlp = nlp->next; - } - spin_unlock_irqrestore(&nd->queue_lock, flags); - return 1; -} - -static __inline__ void isdn_net_inc_frame_cnt(isdn_net_local *lp) -{ - atomic_inc(&lp->frame_cnt); - if (isdn_net_device_busy(lp)) - isdn_net_device_stop_queue(lp); -} - -static __inline__ void isdn_net_dec_frame_cnt(isdn_net_local *lp) -{ - atomic_dec(&lp->frame_cnt); - - if (!(isdn_net_device_busy(lp))) { - if (!skb_queue_empty(&lp->super_tx_queue)) { - schedule_work(&lp->tqueue); - } else { - isdn_net_device_wake_queue(lp); - } - } -} - -static __inline__ void isdn_net_zero_frame_cnt(isdn_net_local *lp) -{ - atomic_set(&lp->frame_cnt, 0); -} - -/* For 2.2.x we leave the transmitter busy timeout at 2 secs, just - * to be safe. - * For 2.3.x we push it up to 20 secs, because call establishment - * (in particular callback) may take such a long time, and we - * don't want confusing messages in the log. However, there is a slight - * possibility that this large timeout will break other things like MPPP, - * which might rely on the tx timeout. If so, we'll find out this way... - */ - -#define ISDN_NET_TX_TIMEOUT (20 * HZ) - -/* Prototypes */ - -static int isdn_net_force_dial_lp(isdn_net_local *); -static netdev_tx_t isdn_net_start_xmit(struct sk_buff *, - struct net_device *); - -static void isdn_net_ciscohdlck_connected(isdn_net_local *lp); -static void isdn_net_ciscohdlck_disconnected(isdn_net_local *lp); - -char *isdn_net_revision = "$Revision: 1.1.2.2 $"; - -/* - * Code for raw-networking over ISDN - */ - -static void -isdn_net_unreachable(struct net_device *dev, struct sk_buff *skb, char *reason) -{ - if (skb) { - - u_short proto = ntohs(skb->protocol); - - printk(KERN_DEBUG "isdn_net: %s: %s, signalling dst_link_failure %s\n", - dev->name, - (reason != NULL) ? reason : "unknown", - (proto != ETH_P_IP) ? "Protocol != ETH_P_IP" : ""); - - dst_link_failure(skb); - } - else { /* dial not triggered by rawIP packet */ - printk(KERN_DEBUG "isdn_net: %s: %s\n", - dev->name, - (reason != NULL) ? reason : "reason unknown"); - } -} - -static void -isdn_net_reset(struct net_device *dev) -{ -#ifdef CONFIG_ISDN_X25 - struct concap_device_ops *dops = - ((isdn_net_local *)netdev_priv(dev))->dops; - struct concap_proto *cprot = - ((isdn_net_local *)netdev_priv(dev))->netdev->cprot; -#endif -#ifdef CONFIG_ISDN_X25 - if (cprot && cprot->pops && dops) - cprot->pops->restart(cprot, dev, dops); -#endif -} - -/* Open/initialize the board. */ -static int -isdn_net_open(struct net_device *dev) -{ - int i; - struct net_device *p; - struct in_device *in_dev; - - /* moved here from isdn_net_reset, because only the master has an - interface associated which is supposed to be started. BTW: - we need to call netif_start_queue, not netif_wake_queue here */ - netif_start_queue(dev); - - isdn_net_reset(dev); - /* Fill in the MAC-level header (not needed, but for compatibility... */ - for (i = 0; i < ETH_ALEN - sizeof(u32); i++) - dev->dev_addr[i] = 0xfc; - if ((in_dev = dev->ip_ptr) != NULL) { - /* - * Any address will do - we take the first - */ - struct in_ifaddr *ifa = in_dev->ifa_list; - if (ifa != NULL) - memcpy(dev->dev_addr + 2, &ifa->ifa_local, 4); - } - - /* If this interface has slaves, start them also */ - p = MASTER_TO_SLAVE(dev); - if (p) { - while (p) { - isdn_net_reset(p); - p = MASTER_TO_SLAVE(p); - } - } - isdn_lock_drivers(); - return 0; -} - -/* - * Assign an ISDN-channel to a net-interface - */ -static void -isdn_net_bind_channel(isdn_net_local *lp, int idx) -{ - lp->flags |= ISDN_NET_CONNECTED; - lp->isdn_device = dev->drvmap[idx]; - lp->isdn_channel = dev->chanmap[idx]; - dev->rx_netdev[idx] = lp->netdev; - dev->st_netdev[idx] = lp->netdev; -} - -/* - * unbind a net-interface (resets interface after an error) - */ -static void -isdn_net_unbind_channel(isdn_net_local *lp) -{ - skb_queue_purge(&lp->super_tx_queue); - - if (!lp->master) { /* reset only master device */ - /* Moral equivalent of dev_purge_queues(): - BEWARE! This chunk of code cannot be called from hardware - interrupt handler. I hope it is true. --ANK - */ - qdisc_reset_all_tx(lp->netdev->dev); - } - lp->dialstate = 0; - dev->rx_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL; - dev->st_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL; - if (lp->isdn_device != -1 && lp->isdn_channel != -1) - isdn_free_channel(lp->isdn_device, lp->isdn_channel, - ISDN_USAGE_NET); - lp->flags &= ~ISDN_NET_CONNECTED; - lp->isdn_device = -1; - lp->isdn_channel = -1; -} - -/* - * Perform auto-hangup and cps-calculation for net-interfaces. - * - * auto-hangup: - * Increment idle-counter (this counter is reset on any incoming or - * outgoing packet), if counter exceeds configured limit either do a - * hangup immediately or - if configured - wait until just before the next - * charge-info. - * - * cps-calculation (needed for dynamic channel-bundling): - * Since this function is called every second, simply reset the - * byte-counter of the interface after copying it to the cps-variable. - */ -static unsigned long last_jiffies = -HZ; - -void -isdn_net_autohup(void) -{ - isdn_net_dev *p = dev->netdev; - int anymore; - - anymore = 0; - while (p) { - isdn_net_local *l = p->local; - if (jiffies == last_jiffies) - l->cps = l->transcount; - else - l->cps = (l->transcount * HZ) / (jiffies - last_jiffies); - l->transcount = 0; - if (dev->net_verbose > 3) - printk(KERN_DEBUG "%s: %d bogocps\n", p->dev->name, l->cps); - if ((l->flags & ISDN_NET_CONNECTED) && (!l->dialstate)) { - anymore = 1; - l->huptimer++; - /* - * if there is some dialmode where timeout-hangup - * should _not_ be done, check for that here - */ - if ((l->onhtime) && - (l->huptimer > l->onhtime)) - { - if (l->hupflags & ISDN_MANCHARGE && - l->hupflags & ISDN_CHARGEHUP) { - while (time_after(jiffies, l->chargetime + l->chargeint)) - l->chargetime += l->chargeint; - if (time_after(jiffies, l->chargetime + l->chargeint - 2 * HZ)) - if (l->outgoing || l->hupflags & ISDN_INHUP) - isdn_net_hangup(p->dev); - } else if (l->outgoing) { - if (l->hupflags & ISDN_CHARGEHUP) { - if (l->hupflags & ISDN_WAITCHARGE) { - printk(KERN_DEBUG "isdn_net: Hupflags of %s are %X\n", - p->dev->name, l->hupflags); - isdn_net_hangup(p->dev); - } else if (time_after(jiffies, l->chargetime + l->chargeint)) { - printk(KERN_DEBUG - "isdn_net: %s: chtime = %lu, chint = %d\n", - p->dev->name, l->chargetime, l->chargeint); - isdn_net_hangup(p->dev); - } - } else - isdn_net_hangup(p->dev); - } else if (l->hupflags & ISDN_INHUP) - isdn_net_hangup(p->dev); - } - - if (dev->global_flags & ISDN_GLOBAL_STOPPED || (ISDN_NET_DIALMODE(*l) == ISDN_NET_DM_OFF)) { - isdn_net_hangup(p->dev); - break; - } - } - p = (isdn_net_dev *) p->next; - } - last_jiffies = jiffies; - isdn_timer_ctrl(ISDN_TIMER_NETHANGUP, anymore); -} - -static void isdn_net_lp_disconnected(isdn_net_local *lp) -{ - isdn_net_rm_from_bundle(lp); -} - -/* - * Handle status-messages from ISDN-interfacecard. - * This function is called from within the main-status-dispatcher - * isdn_status_callback, which itself is called from the low-level driver. - * Return: 1 = Event handled, 0 = not for us or unknown Event. - */ -int -isdn_net_stat_callback(int idx, isdn_ctrl *c) -{ - isdn_net_dev *p = dev->st_netdev[idx]; - int cmd = c->command; - - if (p) { - isdn_net_local *lp = p->local; -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = lp->netdev->cprot; - struct concap_proto_ops *pops = cprot ? cprot->pops : NULL; -#endif - switch (cmd) { - case ISDN_STAT_BSENT: - /* A packet has successfully been sent out */ - if ((lp->flags & ISDN_NET_CONNECTED) && - (!lp->dialstate)) { - isdn_net_dec_frame_cnt(lp); - lp->stats.tx_packets++; - lp->stats.tx_bytes += c->parm.length; - } - return 1; - case ISDN_STAT_DCONN: - /* D-Channel is up */ - switch (lp->dialstate) { - case 4: - case 7: - case 8: - lp->dialstate++; - return 1; - case 12: - lp->dialstate = 5; - return 1; - } - break; - case ISDN_STAT_DHUP: - /* Either D-Channel-hangup or error during dialout */ -#ifdef CONFIG_ISDN_X25 - /* If we are not connencted then dialing had - failed. If there are generic encap protocol - receiver routines signal the closure of - the link*/ - - if (!(lp->flags & ISDN_NET_CONNECTED) - && pops && pops->disconn_ind) - pops->disconn_ind(cprot); -#endif /* CONFIG_ISDN_X25 */ - if ((!lp->dialstate) && (lp->flags & ISDN_NET_CONNECTED)) { - if (lp->p_encap == ISDN_NET_ENCAP_CISCOHDLCK) - isdn_net_ciscohdlck_disconnected(lp); -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - isdn_ppp_free(lp); -#endif - isdn_net_lp_disconnected(lp); - isdn_all_eaz(lp->isdn_device, lp->isdn_channel); - printk(KERN_INFO "%s: remote hangup\n", p->dev->name); - printk(KERN_INFO "%s: Chargesum is %d\n", p->dev->name, - lp->charge); - isdn_net_unbind_channel(lp); - return 1; - } - break; -#ifdef CONFIG_ISDN_X25 - case ISDN_STAT_BHUP: - /* B-Channel-hangup */ - /* try if there are generic encap protocol - receiver routines and signal the closure of - the link */ - if (pops && pops->disconn_ind) { - pops->disconn_ind(cprot); - return 1; - } - break; -#endif /* CONFIG_ISDN_X25 */ - case ISDN_STAT_BCONN: - /* B-Channel is up */ - isdn_net_zero_frame_cnt(lp); - switch (lp->dialstate) { - case 5: - case 6: - case 7: - case 8: - case 9: - case 10: - case 12: - if (lp->dialstate <= 6) { - dev->usage[idx] |= ISDN_USAGE_OUTGOING; - isdn_info_update(); - } else - dev->rx_netdev[idx] = p; - lp->dialstate = 0; - isdn_timer_ctrl(ISDN_TIMER_NETHANGUP, 1); - if (lp->p_encap == ISDN_NET_ENCAP_CISCOHDLCK) - isdn_net_ciscohdlck_connected(lp); - if (lp->p_encap != ISDN_NET_ENCAP_SYNCPPP) { - if (lp->master) { /* is lp a slave? */ - isdn_net_dev *nd = ISDN_MASTER_PRIV(lp)->netdev; - isdn_net_add_to_bundle(nd, lp); - } - } - printk(KERN_INFO "isdn_net: %s connected\n", p->dev->name); - /* If first Chargeinfo comes before B-Channel connect, - * we correct the timestamp here. - */ - lp->chargetime = jiffies; - - /* reset dial-timeout */ - lp->dialstarted = 0; - lp->dialwait_timer = 0; - -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - isdn_ppp_wakeup_daemon(lp); -#endif -#ifdef CONFIG_ISDN_X25 - /* try if there are generic concap receiver routines */ - if (pops) - if (pops->connect_ind) - pops->connect_ind(cprot); -#endif /* CONFIG_ISDN_X25 */ - /* ppp needs to do negotiations first */ - if (lp->p_encap != ISDN_NET_ENCAP_SYNCPPP) - isdn_net_device_wake_queue(lp); - return 1; - } - break; - case ISDN_STAT_NODCH: - /* No D-Channel avail. */ - if (lp->dialstate == 4) { - lp->dialstate--; - return 1; - } - break; - case ISDN_STAT_CINF: - /* Charge-info from TelCo. Calculate interval between - * charge-infos and set timestamp for last info for - * usage by isdn_net_autohup() - */ - lp->charge++; - if (lp->hupflags & ISDN_HAVECHARGE) { - lp->hupflags &= ~ISDN_WAITCHARGE; - lp->chargeint = jiffies - lp->chargetime - (2 * HZ); - } - if (lp->hupflags & ISDN_WAITCHARGE) - lp->hupflags |= ISDN_HAVECHARGE; - lp->chargetime = jiffies; - printk(KERN_DEBUG "isdn_net: Got CINF chargetime of %s now %lu\n", - p->dev->name, lp->chargetime); - return 1; - } - } - return 0; -} - -/* - * Perform dialout for net-interfaces and timeout-handling for - * D-Channel-up and B-Channel-up Messages. - * This function is initially called from within isdn_net_start_xmit() or - * or isdn_net_find_icall() after initializing the dialstate for an - * interface. If further calls are needed, the function schedules itself - * for a timer-callback via isdn_timer_function(). - * The dialstate is also affected by incoming status-messages from - * the ISDN-Channel which are handled in isdn_net_stat_callback() above. - */ -void -isdn_net_dial(void) -{ - isdn_net_dev *p = dev->netdev; - int anymore = 0; - int i; - isdn_ctrl cmd; - u_char *phone_number; - - while (p) { - isdn_net_local *lp = p->local; - -#ifdef ISDN_DEBUG_NET_DIAL - if (lp->dialstate) - printk(KERN_DEBUG "%s: dialstate=%d\n", p->dev->name, lp->dialstate); -#endif - switch (lp->dialstate) { - case 0: - /* Nothing to do for this interface */ - break; - case 1: - /* Initiate dialout. Set phone-number-pointer to first number - * of interface. - */ - lp->dial = lp->phone[1]; - if (!lp->dial) { - printk(KERN_WARNING "%s: phone number deleted?\n", - p->dev->name); - isdn_net_hangup(p->dev); - break; - } - anymore = 1; - - if (lp->dialtimeout > 0) - if (lp->dialstarted == 0 || time_after(jiffies, lp->dialstarted + lp->dialtimeout + lp->dialwait)) { - lp->dialstarted = jiffies; - lp->dialwait_timer = 0; - } - - lp->dialstate++; - /* Fall through */ - case 2: - /* Prepare dialing. Clear EAZ, then set EAZ. */ - cmd.driver = lp->isdn_device; - cmd.arg = lp->isdn_channel; - cmd.command = ISDN_CMD_CLREAZ; - isdn_command(&cmd); - sprintf(cmd.parm.num, "%s", isdn_map_eaz2msn(lp->msn, cmd.driver)); - cmd.command = ISDN_CMD_SETEAZ; - isdn_command(&cmd); - lp->dialretry = 0; - anymore = 1; - lp->dialstate++; - /* Fall through */ - case 3: - /* Setup interface, dial current phone-number, switch to next number. - * If list of phone-numbers is exhausted, increment - * retry-counter. - */ - if (dev->global_flags & ISDN_GLOBAL_STOPPED || (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF)) { - char *s; - if (dev->global_flags & ISDN_GLOBAL_STOPPED) - s = "dial suppressed: isdn system stopped"; - else - s = "dial suppressed: dialmode `off'"; - isdn_net_unreachable(p->dev, NULL, s); - isdn_net_hangup(p->dev); - break; - } - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_SETL2; - cmd.arg = lp->isdn_channel + (lp->l2_proto << 8); - isdn_command(&cmd); - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = lp->isdn_channel + (lp->l3_proto << 8); - isdn_command(&cmd); - cmd.driver = lp->isdn_device; - cmd.arg = lp->isdn_channel; - if (!lp->dial) { - printk(KERN_WARNING "%s: phone number deleted?\n", - p->dev->name); - isdn_net_hangup(p->dev); - break; - } - if (!strncmp(lp->dial->num, "LEASED", strlen("LEASED"))) { - lp->dialstate = 4; - printk(KERN_INFO "%s: Open leased line ...\n", p->dev->name); - } else { - if (lp->dialtimeout > 0) - if (time_after(jiffies, lp->dialstarted + lp->dialtimeout)) { - lp->dialwait_timer = jiffies + lp->dialwait; - lp->dialstarted = 0; - isdn_net_unreachable(p->dev, NULL, "dial: timed out"); - isdn_net_hangup(p->dev); - break; - } - - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_DIAL; - cmd.parm.setup.si2 = 0; - - /* check for DOV */ - phone_number = lp->dial->num; - if ((*phone_number == 'v') || - (*phone_number == 'V')) { /* DOV call */ - cmd.parm.setup.si1 = 1; - } else { /* DATA call */ - cmd.parm.setup.si1 = 7; - } - - strcpy(cmd.parm.setup.phone, phone_number); - /* - * Switch to next number or back to start if at end of list. - */ - if (!(lp->dial = (isdn_net_phone *) lp->dial->next)) { - lp->dial = lp->phone[1]; - lp->dialretry++; - - if (lp->dialretry > lp->dialmax) { - if (lp->dialtimeout == 0) { - lp->dialwait_timer = jiffies + lp->dialwait; - lp->dialstarted = 0; - isdn_net_unreachable(p->dev, NULL, "dial: tried all numbers dialmax times"); - } - isdn_net_hangup(p->dev); - break; - } - } - sprintf(cmd.parm.setup.eazmsn, "%s", - isdn_map_eaz2msn(lp->msn, cmd.driver)); - i = isdn_dc2minor(lp->isdn_device, lp->isdn_channel); - if (i >= 0) { - strcpy(dev->num[i], cmd.parm.setup.phone); - dev->usage[i] |= ISDN_USAGE_OUTGOING; - isdn_info_update(); - } - printk(KERN_INFO "%s: dialing %d %s... %s\n", p->dev->name, - lp->dialretry, cmd.parm.setup.phone, - (cmd.parm.setup.si1 == 1) ? "DOV" : ""); - lp->dtimer = 0; -#ifdef ISDN_DEBUG_NET_DIAL - printk(KERN_DEBUG "dial: d=%d c=%d\n", lp->isdn_device, - lp->isdn_channel); -#endif - isdn_command(&cmd); - } - lp->huptimer = 0; - lp->outgoing = 1; - if (lp->chargeint) { - lp->hupflags |= ISDN_HAVECHARGE; - lp->hupflags &= ~ISDN_WAITCHARGE; - } else { - lp->hupflags |= ISDN_WAITCHARGE; - lp->hupflags &= ~ISDN_HAVECHARGE; - } - anymore = 1; - lp->dialstate = - (lp->cbdelay && - (lp->flags & ISDN_NET_CBOUT)) ? 12 : 4; - break; - case 4: - /* Wait for D-Channel-connect. - * If timeout, switch back to state 3. - * Dialmax-handling moved to state 3. - */ - if (lp->dtimer++ > ISDN_TIMER_DTIMEOUT10) - lp->dialstate = 3; - anymore = 1; - break; - case 5: - /* Got D-Channel-Connect, send B-Channel-request */ - cmd.driver = lp->isdn_device; - cmd.arg = lp->isdn_channel; - cmd.command = ISDN_CMD_ACCEPTB; - anymore = 1; - lp->dtimer = 0; - lp->dialstate++; - isdn_command(&cmd); - break; - case 6: - /* Wait for B- or D-Channel-connect. If timeout, - * switch back to state 3. - */ -#ifdef ISDN_DEBUG_NET_DIAL - printk(KERN_DEBUG "dialtimer2: %d\n", lp->dtimer); -#endif - if (lp->dtimer++ > ISDN_TIMER_DTIMEOUT10) - lp->dialstate = 3; - anymore = 1; - break; - case 7: - /* Got incoming Call, setup L2 and L3 protocols, - * then wait for D-Channel-connect - */ -#ifdef ISDN_DEBUG_NET_DIAL - printk(KERN_DEBUG "dialtimer4: %d\n", lp->dtimer); -#endif - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_SETL2; - cmd.arg = lp->isdn_channel + (lp->l2_proto << 8); - isdn_command(&cmd); - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = lp->isdn_channel + (lp->l3_proto << 8); - isdn_command(&cmd); - if (lp->dtimer++ > ISDN_TIMER_DTIMEOUT15) - isdn_net_hangup(p->dev); - else { - anymore = 1; - lp->dialstate++; - } - break; - case 9: - /* Got incoming D-Channel-Connect, send B-Channel-request */ - cmd.driver = lp->isdn_device; - cmd.arg = lp->isdn_channel; - cmd.command = ISDN_CMD_ACCEPTB; - isdn_command(&cmd); - anymore = 1; - lp->dtimer = 0; - lp->dialstate++; - break; - case 8: - case 10: - /* Wait for B- or D-channel-connect */ -#ifdef ISDN_DEBUG_NET_DIAL - printk(KERN_DEBUG "dialtimer4: %d\n", lp->dtimer); -#endif - if (lp->dtimer++ > ISDN_TIMER_DTIMEOUT10) - isdn_net_hangup(p->dev); - else - anymore = 1; - break; - case 11: - /* Callback Delay */ - if (lp->dtimer++ > lp->cbdelay) - lp->dialstate = 1; - anymore = 1; - break; - case 12: - /* Remote does callback. Hangup after cbdelay, then wait for incoming - * call (in state 4). - */ - if (lp->dtimer++ > lp->cbdelay) - { - printk(KERN_INFO "%s: hangup waiting for callback ...\n", p->dev->name); - lp->dtimer = 0; - lp->dialstate = 4; - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_HANGUP; - cmd.arg = lp->isdn_channel; - isdn_command(&cmd); - isdn_all_eaz(lp->isdn_device, lp->isdn_channel); - } - anymore = 1; - break; - default: - printk(KERN_WARNING "isdn_net: Illegal dialstate %d for device %s\n", - lp->dialstate, p->dev->name); - } - p = (isdn_net_dev *) p->next; - } - isdn_timer_ctrl(ISDN_TIMER_NETDIAL, anymore); -} - -/* - * Perform hangup for a net-interface. - */ -void -isdn_net_hangup(struct net_device *d) -{ - isdn_net_local *lp = netdev_priv(d); - isdn_ctrl cmd; -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = lp->netdev->cprot; - struct concap_proto_ops *pops = cprot ? cprot->pops : NULL; -#endif - - if (lp->flags & ISDN_NET_CONNECTED) { - if (lp->slave != NULL) { - isdn_net_local *slp = ISDN_SLAVE_PRIV(lp); - if (slp->flags & ISDN_NET_CONNECTED) { - printk(KERN_INFO - "isdn_net: hang up slave %s before %s\n", - lp->slave->name, d->name); - isdn_net_hangup(lp->slave); - } - } - printk(KERN_INFO "isdn_net: local hangup %s\n", d->name); -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - isdn_ppp_free(lp); -#endif - isdn_net_lp_disconnected(lp); -#ifdef CONFIG_ISDN_X25 - /* try if there are generic encap protocol - receiver routines and signal the closure of - the link */ - if (pops && pops->disconn_ind) - pops->disconn_ind(cprot); -#endif /* CONFIG_ISDN_X25 */ - - cmd.driver = lp->isdn_device; - cmd.command = ISDN_CMD_HANGUP; - cmd.arg = lp->isdn_channel; - isdn_command(&cmd); - printk(KERN_INFO "%s: Chargesum is %d\n", d->name, lp->charge); - isdn_all_eaz(lp->isdn_device, lp->isdn_channel); - } - isdn_net_unbind_channel(lp); -} - -typedef struct { - __be16 source; - __be16 dest; -} ip_ports; - -static void -isdn_net_log_skb(struct sk_buff *skb, isdn_net_local *lp) -{ - /* hopefully, this was set correctly */ - const u_char *p = skb_network_header(skb); - unsigned short proto = ntohs(skb->protocol); - int data_ofs; - ip_ports *ipp; - char addinfo[100]; - - addinfo[0] = '\0'; - /* This check stolen from 2.1.72 dev_queue_xmit_nit() */ - if (p < skb->data || skb_network_header(skb) >= skb_tail_pointer(skb)) { - /* fall back to old isdn_net_log_packet method() */ - char *buf = skb->data; - - printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->netdev->dev->name); - p = buf; - proto = ETH_P_IP; - switch (lp->p_encap) { - case ISDN_NET_ENCAP_IPTYP: - proto = ntohs(*(__be16 *)&buf[0]); - p = &buf[2]; - break; - case ISDN_NET_ENCAP_ETHER: - proto = ntohs(*(__be16 *)&buf[12]); - p = &buf[14]; - break; - case ISDN_NET_ENCAP_CISCOHDLC: - proto = ntohs(*(__be16 *)&buf[2]); - p = &buf[4]; - break; -#ifdef CONFIG_ISDN_PPP - case ISDN_NET_ENCAP_SYNCPPP: - proto = ntohs(skb->protocol); - p = &buf[IPPP_MAX_HEADER]; - break; -#endif - } - } - data_ofs = ((p[0] & 15) * 4); - switch (proto) { - case ETH_P_IP: - switch (p[9]) { - case 1: - strcpy(addinfo, " ICMP"); - break; - case 2: - strcpy(addinfo, " IGMP"); - break; - case 4: - strcpy(addinfo, " IPIP"); - break; - case 6: - ipp = (ip_ports *) (&p[data_ofs]); - sprintf(addinfo, " TCP, port: %d -> %d", ntohs(ipp->source), - ntohs(ipp->dest)); - break; - case 8: - strcpy(addinfo, " EGP"); - break; - case 12: - strcpy(addinfo, " PUP"); - break; - case 17: - ipp = (ip_ports *) (&p[data_ofs]); - sprintf(addinfo, " UDP, port: %d -> %d", ntohs(ipp->source), - ntohs(ipp->dest)); - break; - case 22: - strcpy(addinfo, " IDP"); - break; - } - printk(KERN_INFO "OPEN: %pI4 -> %pI4%s\n", - p + 12, p + 16, addinfo); - break; - case ETH_P_ARP: - printk(KERN_INFO "OPEN: ARP %pI4 -> *.*.*.* ?%pI4\n", - p + 14, p + 24); - break; - } -} - -/* - * this function is used to send supervisory data, i.e. data which was - * not received from the network layer, but e.g. frames from ipppd, CCP - * reset frames etc. - */ -void isdn_net_write_super(isdn_net_local *lp, struct sk_buff *skb) -{ - if (in_irq()) { - // we can't grab the lock from irq context, - // so we just queue the packet - skb_queue_tail(&lp->super_tx_queue, skb); - schedule_work(&lp->tqueue); - return; - } - - spin_lock_bh(&lp->xmit_lock); - if (!isdn_net_lp_busy(lp)) { - isdn_net_writebuf_skb(lp, skb); - } else { - skb_queue_tail(&lp->super_tx_queue, skb); - } - spin_unlock_bh(&lp->xmit_lock); -} - -/* - * called from tq_immediate - */ -static void isdn_net_softint(struct work_struct *work) -{ - isdn_net_local *lp = container_of(work, isdn_net_local, tqueue); - struct sk_buff *skb; - - spin_lock_bh(&lp->xmit_lock); - while (!isdn_net_lp_busy(lp)) { - skb = skb_dequeue(&lp->super_tx_queue); - if (!skb) - break; - isdn_net_writebuf_skb(lp, skb); - } - spin_unlock_bh(&lp->xmit_lock); -} - -/* - * all frames sent from the (net) LL to a HL driver should go via this function - * it's serialized by the caller holding the lp->xmit_lock spinlock - */ -void isdn_net_writebuf_skb(isdn_net_local *lp, struct sk_buff *skb) -{ - int ret; - int len = skb->len; /* save len */ - - /* before obtaining the lock the caller should have checked that - the lp isn't busy */ - if (isdn_net_lp_busy(lp)) { - printk("isdn BUG at %s:%d!\n", __FILE__, __LINE__); - goto error; - } - - if (!(lp->flags & ISDN_NET_CONNECTED)) { - printk("isdn BUG at %s:%d!\n", __FILE__, __LINE__); - goto error; - } - ret = isdn_writebuf_skb_stub(lp->isdn_device, lp->isdn_channel, 1, skb); - if (ret != len) { - /* we should never get here */ - printk(KERN_WARNING "%s: HL driver queue full\n", lp->netdev->dev->name); - goto error; - } - - lp->transcount += len; - isdn_net_inc_frame_cnt(lp); - return; - -error: - dev_kfree_skb(skb); - lp->stats.tx_errors++; - -} - - -/* - * Helper function for isdn_net_start_xmit. - * When called, the connection is already established. - * Based on cps-calculation, check if device is overloaded. - * If so, and if a slave exists, trigger dialing for it. - * If any slave is online, deliver packets using a simple round robin - * scheme. - * - * Return: 0 on success, !0 on failure. - */ - -static int -isdn_net_xmit(struct net_device *ndev, struct sk_buff *skb) -{ - isdn_net_dev *nd; - isdn_net_local *slp; - isdn_net_local *lp = netdev_priv(ndev); - int retv = NETDEV_TX_OK; - - if (((isdn_net_local *) netdev_priv(ndev))->master) { - printk("isdn BUG at %s:%d!\n", __FILE__, __LINE__); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - - /* For the other encaps the header has already been built */ -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) { - return isdn_ppp_xmit(skb, ndev); - } -#endif - nd = ((isdn_net_local *) netdev_priv(ndev))->netdev; - lp = isdn_net_get_locked_lp(nd); - if (!lp) { - printk(KERN_WARNING "%s: all channels busy - requeuing!\n", ndev->name); - return NETDEV_TX_BUSY; - } - /* we have our lp locked from now on */ - - /* Reset hangup-timeout */ - lp->huptimer = 0; // FIXME? - isdn_net_writebuf_skb(lp, skb); - spin_unlock_bh(&lp->xmit_lock); - - /* the following stuff is here for backwards compatibility. - * in future, start-up and hangup of slaves (based on current load) - * should move to userspace and get based on an overall cps - * calculation - */ - if (lp->cps > lp->triggercps) { - if (lp->slave) { - if (!lp->sqfull) { - /* First time overload: set timestamp only */ - lp->sqfull = 1; - lp->sqfull_stamp = jiffies; - } else { - /* subsequent overload: if slavedelay exceeded, start dialing */ - if (time_after(jiffies, lp->sqfull_stamp + lp->slavedelay)) { - slp = ISDN_SLAVE_PRIV(lp); - if (!(slp->flags & ISDN_NET_CONNECTED)) { - isdn_net_force_dial_lp(ISDN_SLAVE_PRIV(lp)); - } - } - } - } - } else { - if (lp->sqfull && time_after(jiffies, lp->sqfull_stamp + lp->slavedelay + (10 * HZ))) { - lp->sqfull = 0; - } - /* this is a hack to allow auto-hangup for slaves on moderate loads */ - nd->queue = nd->local; - } - - return retv; - -} - -static void -isdn_net_adjust_hdr(struct sk_buff *skb, struct net_device *dev) -{ - isdn_net_local *lp = netdev_priv(dev); - if (!skb) - return; - if (lp->p_encap == ISDN_NET_ENCAP_ETHER) { - const int pullsize = skb_network_offset(skb) - ETH_HLEN; - if (pullsize > 0) { - printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize); - skb_pull(skb, pullsize); - } - } -} - - -static void isdn_net_tx_timeout(struct net_device *ndev) -{ - isdn_net_local *lp = netdev_priv(ndev); - - printk(KERN_WARNING "isdn_tx_timeout dev %s dialstate %d\n", ndev->name, lp->dialstate); - if (!lp->dialstate) { - lp->stats.tx_errors++; - /* - * There is a certain probability that this currently - * works at all because if we always wake up the interface, - * then upper layer will try to send the next packet - * immediately. And then, the old clean_up logic in the - * driver will hopefully continue to work as it used to do. - * - * This is rather primitive right know, we better should - * clean internal queues here, in particular for multilink and - * ppp, and reset HL driver's channel, too. --HE - * - * actually, this may not matter at all, because ISDN hardware - * should not see transmitter hangs at all IMO - * changed KERN_DEBUG to KERN_WARNING to find out if this is - * ever called --KG - */ - } - netif_trans_update(ndev); - netif_wake_queue(ndev); -} - -/* - * Try sending a packet. - * If this interface isn't connected to a ISDN-Channel, find a free channel, - * and start dialing. - */ -static netdev_tx_t -isdn_net_start_xmit(struct sk_buff *skb, struct net_device *ndev) -{ - isdn_net_local *lp = netdev_priv(ndev); -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = lp->netdev->cprot; -/* At this point hard_start_xmit() passes control to the encapsulation - protocol (if present). - For X.25 auto-dialing is completly bypassed because: - - It does not conform with the semantics of a reliable datalink - service as needed by X.25 PLP. - - I don't want that the interface starts dialing when the network layer - sends a message which requests to disconnect the lapb link (or if it - sends any other message not resulting in data transmission). - Instead, dialing will be initiated by the encapsulation protocol entity - when a dl_establish request is received from the upper layer. -*/ - if (cprot && cprot->pops) { - int ret = cprot->pops->encap_and_xmit(cprot, skb); - - if (ret) - netif_stop_queue(ndev); - return ret; - } else -#endif - /* auto-dialing xmit function */ - { -#ifdef ISDN_DEBUG_NET_DUMP - u_char *buf; -#endif - isdn_net_adjust_hdr(skb, ndev); -#ifdef ISDN_DEBUG_NET_DUMP - buf = skb->data; - isdn_dumppkt("S:", buf, skb->len, 40); -#endif - - if (!(lp->flags & ISDN_NET_CONNECTED)) { - int chi; - /* only do autodial if allowed by config */ - if (!(ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_AUTO)) { - isdn_net_unreachable(ndev, skb, "dial rejected: interface not in dialmode `auto'"); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (lp->phone[1]) { - ulong flags; - - if (lp->dialwait_timer <= 0) - if (lp->dialstarted > 0 && lp->dialtimeout > 0 && time_before(jiffies, lp->dialstarted + lp->dialtimeout + lp->dialwait)) - lp->dialwait_timer = lp->dialstarted + lp->dialtimeout + lp->dialwait; - - if (lp->dialwait_timer > 0) { - if (time_before(jiffies, lp->dialwait_timer)) { - isdn_net_unreachable(ndev, skb, "dial rejected: retry-time not reached"); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } else - lp->dialwait_timer = 0; - } - /* Grab a free ISDN-Channel */ - spin_lock_irqsave(&dev->lock, flags); - if (((chi = - isdn_get_free_channel( - ISDN_USAGE_NET, - lp->l2_proto, - lp->l3_proto, - lp->pre_device, - lp->pre_channel, - lp->msn) - ) < 0) && - ((chi = - isdn_get_free_channel( - ISDN_USAGE_NET, - lp->l2_proto, - lp->l3_proto, - lp->pre_device, - lp->pre_channel^1, - lp->msn) - ) < 0)) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_unreachable(ndev, skb, - "No channel"); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - /* Log packet, which triggered dialing */ - if (dev->net_verbose) - isdn_net_log_skb(skb, lp); - lp->dialstate = 1; - /* Connect interface with channel */ - isdn_net_bind_channel(lp, chi); -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) { - /* no 'first_skb' handling for syncPPP */ - if (isdn_ppp_bind(lp) < 0) { - dev_kfree_skb(skb); - isdn_net_unbind_channel(lp); - spin_unlock_irqrestore(&dev->lock, flags); - return NETDEV_TX_OK; /* STN (skb to nirvana) ;) */ - } -#ifdef CONFIG_IPPP_FILTER - if (isdn_ppp_autodial_filter(skb, lp)) { - isdn_ppp_free(lp); - isdn_net_unbind_channel(lp); - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_unreachable(ndev, skb, "dial rejected: packet filtered"); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } -#endif - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_dial(); /* Initiate dialing */ - netif_stop_queue(ndev); - return NETDEV_TX_BUSY; /* let upper layer requeue skb packet */ - } -#endif - /* Initiate dialing */ - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_dial(); - isdn_net_device_stop_queue(lp); - return NETDEV_TX_BUSY; - } else { - isdn_net_unreachable(ndev, skb, - "No phone number"); - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - } else { - /* Device is connected to an ISDN channel */ - netif_trans_update(ndev); - if (!lp->dialstate) { - /* ISDN connection is established, try sending */ - int ret; - ret = (isdn_net_xmit(ndev, skb)); - if (ret) netif_stop_queue(ndev); - return ret; - } else - netif_stop_queue(ndev); - } - } - return NETDEV_TX_BUSY; -} - -/* - * Shutdown a net-interface. - */ -static int -isdn_net_close(struct net_device *dev) -{ - struct net_device *p; -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = - ((isdn_net_local *)netdev_priv(dev))->netdev->cprot; - /* printk(KERN_DEBUG "isdn_net_close %s\n" , dev-> name); */ -#endif - -#ifdef CONFIG_ISDN_X25 - if (cprot && cprot->pops) cprot->pops->close(cprot); -#endif - netif_stop_queue(dev); - p = MASTER_TO_SLAVE(dev); - if (p) { - /* If this interface has slaves, stop them also */ - while (p) { -#ifdef CONFIG_ISDN_X25 - cprot = ((isdn_net_local *)netdev_priv(p)) - ->netdev->cprot; - if (cprot && cprot->pops) - cprot->pops->close(cprot); -#endif - isdn_net_hangup(p); - p = MASTER_TO_SLAVE(p); - } - } - isdn_net_hangup(dev); - isdn_unlock_drivers(); - return 0; -} - -/* - * Get statistics - */ -static struct net_device_stats * -isdn_net_get_stats(struct net_device *dev) -{ - isdn_net_local *lp = netdev_priv(dev); - return &lp->stats; -} - -/* This is simply a copy from std. eth.c EXCEPT we pull ETH_HLEN - * instead of dev->hard_header_len off. This is done because the - * lowlevel-driver has already pulled off its stuff when we get - * here and this routine only gets called with p_encap == ETHER. - * Determine the packet's protocol ID. The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. - */ - -static __be16 -isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - struct ethhdr *eth; - unsigned char *rawp; - - skb_reset_mac_header(skb); - skb_pull(skb, ETH_HLEN); - eth = eth_hdr(skb); - - if (*eth->h_dest & 1) { - if (ether_addr_equal(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - */ - - else if (dev->flags & (IFF_PROMISC /*| IFF_ALLMULTI*/)) { - if (!ether_addr_equal(eth->h_dest, dev->dev_addr)) - skb->pkt_type = PACKET_OTHERHOST; - } - if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) - return eth->h_proto; - - rawp = skb->data; - - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *) rawp == 0xFFFF) - return htons(ETH_P_802_3); - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); -} - - -/* - * CISCO HDLC keepalive specific stuff - */ -static struct sk_buff* -isdn_net_ciscohdlck_alloc_skb(isdn_net_local *lp, int len) -{ - unsigned short hl = dev->drv[lp->isdn_device]->interface->hl_hdrlen; - struct sk_buff *skb; - - skb = alloc_skb(hl + len, GFP_ATOMIC); - if (skb) - skb_reserve(skb, hl); - else - printk("isdn out of mem at %s:%d!\n", __FILE__, __LINE__); - return skb; -} - -/* cisco hdlck device private ioctls */ -static int -isdn_ciscohdlck_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - isdn_net_local *lp = netdev_priv(dev); - unsigned long len = 0; - unsigned long expires = 0; - int tmp = 0; - int period = lp->cisco_keepalive_period; - s8 debserint = lp->cisco_debserint; - int rc = 0; - - if (lp->p_encap != ISDN_NET_ENCAP_CISCOHDLCK) - return -EINVAL; - - switch (cmd) { - /* get/set keepalive period */ - case SIOCGKEEPPERIOD: - len = (unsigned long)sizeof(lp->cisco_keepalive_period); - if (copy_to_user(ifr->ifr_data, - &lp->cisco_keepalive_period, len)) - rc = -EFAULT; - break; - case SIOCSKEEPPERIOD: - tmp = lp->cisco_keepalive_period; - len = (unsigned long)sizeof(lp->cisco_keepalive_period); - if (copy_from_user(&period, ifr->ifr_data, len)) - rc = -EFAULT; - if ((period > 0) && (period <= 32767)) - lp->cisco_keepalive_period = period; - else - rc = -EINVAL; - if (!rc && (tmp != lp->cisco_keepalive_period)) { - expires = (unsigned long)(jiffies + - lp->cisco_keepalive_period * HZ); - mod_timer(&lp->cisco_timer, expires); - printk(KERN_INFO "%s: Keepalive period set " - "to %d seconds.\n", - dev->name, lp->cisco_keepalive_period); - } - break; - - /* get/set debugging */ - case SIOCGDEBSERINT: - len = (unsigned long)sizeof(lp->cisco_debserint); - if (copy_to_user(ifr->ifr_data, - &lp->cisco_debserint, len)) - rc = -EFAULT; - break; - case SIOCSDEBSERINT: - len = (unsigned long)sizeof(lp->cisco_debserint); - if (copy_from_user(&debserint, - ifr->ifr_data, len)) - rc = -EFAULT; - if ((debserint >= 0) && (debserint <= 64)) - lp->cisco_debserint = debserint; - else - rc = -EINVAL; - break; - - default: - rc = -EINVAL; - break; - } - return (rc); -} - - -static int isdn_net_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) -{ - isdn_net_local *lp = netdev_priv(dev); - - switch (lp->p_encap) { -#ifdef CONFIG_ISDN_PPP - case ISDN_NET_ENCAP_SYNCPPP: - return isdn_ppp_dev_ioctl(dev, ifr, cmd); -#endif - case ISDN_NET_ENCAP_CISCOHDLCK: - return isdn_ciscohdlck_dev_ioctl(dev, ifr, cmd); - default: - return -EINVAL; - } -} - -/* called via cisco_timer.function */ -static void -isdn_net_ciscohdlck_slarp_send_keepalive(struct timer_list *t) -{ - isdn_net_local *lp = from_timer(lp, t, cisco_timer); - struct sk_buff *skb; - unsigned char *p; - unsigned long last_cisco_myseq = lp->cisco_myseq; - int myseq_diff = 0; - - if (!(lp->flags & ISDN_NET_CONNECTED) || lp->dialstate) { - printk("isdn BUG at %s:%d!\n", __FILE__, __LINE__); - return; - } - lp->cisco_myseq++; - - myseq_diff = (lp->cisco_myseq - lp->cisco_mineseen); - if ((lp->cisco_line_state) && ((myseq_diff >= 3) || (myseq_diff <= -3))) { - /* line up -> down */ - lp->cisco_line_state = 0; - printk(KERN_WARNING - "UPDOWN: Line protocol on Interface %s," - " changed state to down\n", lp->netdev->dev->name); - /* should stop routing higher-level data across */ - } else if ((!lp->cisco_line_state) && - (myseq_diff >= 0) && (myseq_diff <= 2)) { - /* line down -> up */ - lp->cisco_line_state = 1; - printk(KERN_WARNING - "UPDOWN: Line protocol on Interface %s," - " changed state to up\n", lp->netdev->dev->name); - /* restart routing higher-level data across */ - } - - if (lp->cisco_debserint) - printk(KERN_DEBUG "%s: HDLC " - "myseq %lu, mineseen %lu%c, yourseen %lu, %s\n", - lp->netdev->dev->name, last_cisco_myseq, lp->cisco_mineseen, - ((last_cisco_myseq == lp->cisco_mineseen) ? '*' : 040), - lp->cisco_yourseq, - ((lp->cisco_line_state) ? "line up" : "line down")); - - skb = isdn_net_ciscohdlck_alloc_skb(lp, 4 + 14); - if (!skb) - return; - - p = skb_put(skb, 4 + 14); - - /* cisco header */ - *(u8 *)(p + 0) = CISCO_ADDR_UNICAST; - *(u8 *)(p + 1) = CISCO_CTRL; - *(__be16 *)(p + 2) = cpu_to_be16(CISCO_TYPE_SLARP); - - /* slarp keepalive */ - *(__be32 *)(p + 4) = cpu_to_be32(CISCO_SLARP_KEEPALIVE); - *(__be32 *)(p + 8) = cpu_to_be32(lp->cisco_myseq); - *(__be32 *)(p + 12) = cpu_to_be32(lp->cisco_yourseq); - *(__be16 *)(p + 16) = cpu_to_be16(0xffff); // reliability, always 0xffff - p += 18; - - isdn_net_write_super(lp, skb); - - lp->cisco_timer.expires = jiffies + lp->cisco_keepalive_period * HZ; - - add_timer(&lp->cisco_timer); -} - -static void -isdn_net_ciscohdlck_slarp_send_request(isdn_net_local *lp) -{ - struct sk_buff *skb; - unsigned char *p; - - skb = isdn_net_ciscohdlck_alloc_skb(lp, 4 + 14); - if (!skb) - return; - - p = skb_put(skb, 4 + 14); - - /* cisco header */ - *(u8 *)(p + 0) = CISCO_ADDR_UNICAST; - *(u8 *)(p + 1) = CISCO_CTRL; - *(__be16 *)(p + 2) = cpu_to_be16(CISCO_TYPE_SLARP); - - /* slarp request */ - *(__be32 *)(p + 4) = cpu_to_be32(CISCO_SLARP_REQUEST); - *(__be32 *)(p + 8) = cpu_to_be32(0); // address - *(__be32 *)(p + 12) = cpu_to_be32(0); // netmask - *(__be16 *)(p + 16) = cpu_to_be16(0); // unused - p += 18; - - isdn_net_write_super(lp, skb); -} - -static void -isdn_net_ciscohdlck_connected(isdn_net_local *lp) -{ - lp->cisco_myseq = 0; - lp->cisco_mineseen = 0; - lp->cisco_yourseq = 0; - lp->cisco_keepalive_period = ISDN_TIMER_KEEPINT; - lp->cisco_last_slarp_in = 0; - lp->cisco_line_state = 0; - lp->cisco_debserint = 0; - - /* send slarp request because interface/seq.no.s reset */ - isdn_net_ciscohdlck_slarp_send_request(lp); - - timer_setup(&lp->cisco_timer, - isdn_net_ciscohdlck_slarp_send_keepalive, 0); - lp->cisco_timer.expires = jiffies + lp->cisco_keepalive_period * HZ; - add_timer(&lp->cisco_timer); -} - -static void -isdn_net_ciscohdlck_disconnected(isdn_net_local *lp) -{ - del_timer(&lp->cisco_timer); -} - -static void -isdn_net_ciscohdlck_slarp_send_reply(isdn_net_local *lp) -{ - struct sk_buff *skb; - unsigned char *p; - struct in_device *in_dev = NULL; - __be32 addr = 0; /* local ipv4 address */ - __be32 mask = 0; /* local netmask */ - - if ((in_dev = lp->netdev->dev->ip_ptr) != NULL) { - /* take primary(first) address of interface */ - struct in_ifaddr *ifa = in_dev->ifa_list; - if (ifa != NULL) { - addr = ifa->ifa_local; - mask = ifa->ifa_mask; - } - } - - skb = isdn_net_ciscohdlck_alloc_skb(lp, 4 + 14); - if (!skb) - return; - - p = skb_put(skb, 4 + 14); - - /* cisco header */ - *(u8 *)(p + 0) = CISCO_ADDR_UNICAST; - *(u8 *)(p + 1) = CISCO_CTRL; - *(__be16 *)(p + 2) = cpu_to_be16(CISCO_TYPE_SLARP); - - /* slarp reply, send own ip/netmask; if values are nonsense remote - * should think we are unable to provide it with an address via SLARP */ - *(__be32 *)(p + 4) = cpu_to_be32(CISCO_SLARP_REPLY); - *(__be32 *)(p + 8) = addr; // address - *(__be32 *)(p + 12) = mask; // netmask - *(__be16 *)(p + 16) = cpu_to_be16(0); // unused - p += 18; - - isdn_net_write_super(lp, skb); -} - -static void -isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) -{ - unsigned char *p; - int period; - u32 code; - u32 my_seq; - u32 your_seq; - __be32 local; - __be32 *addr, *mask; - - if (skb->len < 14) - return; - - p = skb->data; - code = be32_to_cpup((__be32 *)p); - p += 4; - - switch (code) { - case CISCO_SLARP_REQUEST: - lp->cisco_yourseq = 0; - isdn_net_ciscohdlck_slarp_send_reply(lp); - break; - case CISCO_SLARP_REPLY: - addr = (__be32 *)p; - mask = (__be32 *)(p + 4); - if (*mask != cpu_to_be32(0xfffffffc)) - goto slarp_reply_out; - if ((*addr & cpu_to_be32(3)) == cpu_to_be32(0) || - (*addr & cpu_to_be32(3)) == cpu_to_be32(3)) - goto slarp_reply_out; - local = *addr ^ cpu_to_be32(3); - printk(KERN_INFO "%s: got slarp reply: remote ip: %pI4, local ip: %pI4 mask: %pI4\n", - lp->netdev->dev->name, addr, &local, mask); - break; - slarp_reply_out: - printk(KERN_INFO "%s: got invalid slarp reply (%pI4/%pI4) - ignored\n", - lp->netdev->dev->name, addr, mask); - break; - case CISCO_SLARP_KEEPALIVE: - period = (int)((jiffies - lp->cisco_last_slarp_in - + HZ / 2 - 1) / HZ); - if (lp->cisco_debserint && - (period != lp->cisco_keepalive_period) && - lp->cisco_last_slarp_in) { - printk(KERN_DEBUG "%s: Keepalive period mismatch - " - "is %d but should be %d.\n", - lp->netdev->dev->name, period, - lp->cisco_keepalive_period); - } - lp->cisco_last_slarp_in = jiffies; - my_seq = be32_to_cpup((__be32 *)(p + 0)); - your_seq = be32_to_cpup((__be32 *)(p + 4)); - p += 10; - lp->cisco_yourseq = my_seq; - lp->cisco_mineseen = your_seq; - break; - } -} - -static void -isdn_net_ciscohdlck_receive(isdn_net_local *lp, struct sk_buff *skb) -{ - unsigned char *p; - u8 addr; - u8 ctrl; - u16 type; - - if (skb->len < 4) - goto out_free; - - p = skb->data; - addr = *(u8 *)(p + 0); - ctrl = *(u8 *)(p + 1); - type = be16_to_cpup((__be16 *)(p + 2)); - p += 4; - skb_pull(skb, 4); - - if (addr != CISCO_ADDR_UNICAST && addr != CISCO_ADDR_BROADCAST) { - printk(KERN_WARNING "%s: Unknown Cisco addr 0x%02x\n", - lp->netdev->dev->name, addr); - goto out_free; - } - if (ctrl != CISCO_CTRL) { - printk(KERN_WARNING "%s: Unknown Cisco ctrl 0x%02x\n", - lp->netdev->dev->name, ctrl); - goto out_free; - } - - switch (type) { - case CISCO_TYPE_SLARP: - isdn_net_ciscohdlck_slarp_in(lp, skb); - goto out_free; - case CISCO_TYPE_CDP: - if (lp->cisco_debserint) - printk(KERN_DEBUG "%s: Received CDP packet. use " - "\"no cdp enable\" on cisco.\n", - lp->netdev->dev->name); - goto out_free; - default: - /* no special cisco protocol */ - skb->protocol = htons(type); - netif_rx(skb); - return; - } - -out_free: - kfree_skb(skb); -} - -/* - * Got a packet from ISDN-Channel. - */ -static void -isdn_net_receive(struct net_device *ndev, struct sk_buff *skb) -{ - isdn_net_local *lp = netdev_priv(ndev); - isdn_net_local *olp = lp; /* original 'lp' */ -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = lp->netdev->cprot; -#endif - lp->transcount += skb->len; - - lp->stats.rx_packets++; - lp->stats.rx_bytes += skb->len; - if (lp->master) { - /* Bundling: If device is a slave-device, deliver to master, also - * handle master's statistics and hangup-timeout - */ - ndev = lp->master; - lp = netdev_priv(ndev); - lp->stats.rx_packets++; - lp->stats.rx_bytes += skb->len; - } - skb->dev = ndev; - skb->pkt_type = PACKET_HOST; - skb_reset_mac_header(skb); -#ifdef ISDN_DEBUG_NET_DUMP - isdn_dumppkt("R:", skb->data, skb->len, 40); -#endif - switch (lp->p_encap) { - case ISDN_NET_ENCAP_ETHER: - /* Ethernet over ISDN */ - olp->huptimer = 0; - lp->huptimer = 0; - skb->protocol = isdn_net_type_trans(skb, ndev); - break; - case ISDN_NET_ENCAP_UIHDLC: - /* HDLC with UI-frame (for ispa with -h1 option) */ - olp->huptimer = 0; - lp->huptimer = 0; - skb_pull(skb, 2); - /* Fall through */ - case ISDN_NET_ENCAP_RAWIP: - /* RAW-IP without MAC-Header */ - olp->huptimer = 0; - lp->huptimer = 0; - skb->protocol = htons(ETH_P_IP); - break; - case ISDN_NET_ENCAP_CISCOHDLCK: - isdn_net_ciscohdlck_receive(lp, skb); - return; - case ISDN_NET_ENCAP_CISCOHDLC: - /* CISCO-HDLC IP with type field and fake I-frame-header */ - skb_pull(skb, 2); - /* Fall through */ - case ISDN_NET_ENCAP_IPTYP: - /* IP with type field */ - olp->huptimer = 0; - lp->huptimer = 0; - skb->protocol = *(__be16 *)&(skb->data[0]); - skb_pull(skb, 2); - if (*(unsigned short *) skb->data == 0xFFFF) - skb->protocol = htons(ETH_P_802_3); - break; -#ifdef CONFIG_ISDN_PPP - case ISDN_NET_ENCAP_SYNCPPP: - /* huptimer is done in isdn_ppp_push_higher */ - isdn_ppp_receive(lp->netdev, olp, skb); - return; -#endif - - default: -#ifdef CONFIG_ISDN_X25 - /* try if there are generic sync_device receiver routines */ - if (cprot) if (cprot->pops) - if (cprot->pops->data_ind) { - cprot->pops->data_ind(cprot, skb); - return; - }; -#endif /* CONFIG_ISDN_X25 */ - printk(KERN_WARNING "%s: unknown encapsulation, dropping\n", - lp->netdev->dev->name); - kfree_skb(skb); - return; - } - - netif_rx(skb); - return; -} - -/* - * A packet arrived via ISDN. Search interface-chain for a corresponding - * interface. If found, deliver packet to receiver-function and return 1, - * else return 0. - */ -int -isdn_net_rcv_skb(int idx, struct sk_buff *skb) -{ - isdn_net_dev *p = dev->rx_netdev[idx]; - - if (p) { - isdn_net_local *lp = p->local; - if ((lp->flags & ISDN_NET_CONNECTED) && - (!lp->dialstate)) { - isdn_net_receive(p->dev, skb); - return 1; - } - } - return 0; -} - -/* - * build an header - * depends on encaps that is being used. - */ - -static int isdn_net_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned plen) -{ - isdn_net_local *lp = netdev_priv(dev); - unsigned char *p; - int len = 0; - - switch (lp->p_encap) { - case ISDN_NET_ENCAP_ETHER: - len = eth_header(skb, dev, type, daddr, saddr, plen); - break; -#ifdef CONFIG_ISDN_PPP - case ISDN_NET_ENCAP_SYNCPPP: - /* stick on a fake header to keep fragmentation code happy. */ - len = IPPP_MAX_HEADER; - skb_push(skb, len); - break; -#endif - case ISDN_NET_ENCAP_RAWIP: - printk(KERN_WARNING "isdn_net_header called with RAW_IP!\n"); - len = 0; - break; - case ISDN_NET_ENCAP_IPTYP: - /* ethernet type field */ - *((__be16 *)skb_push(skb, 2)) = htons(type); - len = 2; - break; - case ISDN_NET_ENCAP_UIHDLC: - /* HDLC with UI-Frames (for ispa with -h1 option) */ - *((__be16 *)skb_push(skb, 2)) = htons(0x0103); - len = 2; - break; - case ISDN_NET_ENCAP_CISCOHDLC: - case ISDN_NET_ENCAP_CISCOHDLCK: - p = skb_push(skb, 4); - *(u8 *)(p + 0) = CISCO_ADDR_UNICAST; - *(u8 *)(p + 1) = CISCO_CTRL; - *(__be16 *)(p + 2) = cpu_to_be16(type); - p += 4; - len = 4; - break; -#ifdef CONFIG_ISDN_X25 - default: - /* try if there are generic concap protocol routines */ - if (lp->netdev->cprot) { - printk(KERN_WARNING "isdn_net_header called with concap_proto!\n"); - len = 0; - break; - } - break; -#endif /* CONFIG_ISDN_X25 */ - } - return len; -} - -static int isdn_header_cache(const struct neighbour *neigh, struct hh_cache *hh, - __be16 type) -{ - const struct net_device *dev = neigh->dev; - isdn_net_local *lp = netdev_priv(dev); - - if (lp->p_encap == ISDN_NET_ENCAP_ETHER) - return eth_header_cache(neigh, hh, type); - return -1; -} - -static void isdn_header_cache_update(struct hh_cache *hh, - const struct net_device *dev, - const unsigned char *haddr) -{ - isdn_net_local *lp = netdev_priv(dev); - if (lp->p_encap == ISDN_NET_ENCAP_ETHER) - eth_header_cache_update(hh, dev, haddr); -} - -static const struct header_ops isdn_header_ops = { - .create = isdn_net_header, - .cache = isdn_header_cache, - .cache_update = isdn_header_cache_update, -}; - -/* - * Interface-setup. (just after registering a new interface) - */ -static int -isdn_net_init(struct net_device *ndev) -{ - ushort max_hlhdr_len = 0; - int drvidx; - - /* - * up till binding we ask the protocol layer to reserve as much - * as we might need for HL layer - */ - - for (drvidx = 0; drvidx < ISDN_MAX_DRIVERS; drvidx++) - if (dev->drv[drvidx]) - if (max_hlhdr_len < dev->drv[drvidx]->interface->hl_hdrlen) - max_hlhdr_len = dev->drv[drvidx]->interface->hl_hdrlen; - - ndev->hard_header_len = ETH_HLEN + max_hlhdr_len; - return 0; -} - -static void -isdn_net_swapbind(int drvidx) -{ - isdn_net_dev *p; - -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: swapping ch of %d\n", drvidx); -#endif - p = dev->netdev; - while (p) { - if (p->local->pre_device == drvidx) - switch (p->local->pre_channel) { - case 0: - p->local->pre_channel = 1; - break; - case 1: - p->local->pre_channel = 0; - break; - } - p = (isdn_net_dev *) p->next; - } -} - -static void -isdn_net_swap_usage(int i1, int i2) -{ - int u1 = dev->usage[i1] & ISDN_USAGE_EXCLUSIVE; - int u2 = dev->usage[i2] & ISDN_USAGE_EXCLUSIVE; - -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: usage of %d and %d\n", i1, i2); -#endif - dev->usage[i1] &= ~ISDN_USAGE_EXCLUSIVE; - dev->usage[i1] |= u2; - dev->usage[i2] &= ~ISDN_USAGE_EXCLUSIVE; - dev->usage[i2] |= u1; - isdn_info_update(); -} - -/* - * An incoming call-request has arrived. - * Search the interface-chain for an appropriate interface. - * If found, connect the interface to the ISDN-channel and initiate - * D- and B-Channel-setup. If secure-flag is set, accept only - * configured phone-numbers. If callback-flag is set, initiate - * callback-dialing. - * - * Return-Value: 0 = No appropriate interface for this call. - * 1 = Call accepted - * 2 = Reject call, wait cbdelay, then call back - * 3 = Reject call - * 4 = Wait cbdelay, then call back - * 5 = No appropriate interface for this call, - * would eventually match if CID was longer. - */ - -int -isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) -{ - char *eaz; - int si1; - int si2; - int ematch; - int wret; - int swapped; - int sidx = 0; - u_long flags; - isdn_net_dev *p; - isdn_net_phone *n; - char nr[ISDN_MSNLEN]; - char *my_eaz; - - /* Search name in netdev-chain */ - if (!setup->phone[0]) { - nr[0] = '0'; - nr[1] = '\0'; - printk(KERN_INFO "isdn_net: Incoming call without OAD, assuming '0'\n"); - } else - strlcpy(nr, setup->phone, ISDN_MSNLEN); - si1 = (int) setup->si1; - si2 = (int) setup->si2; - if (!setup->eazmsn[0]) { - printk(KERN_WARNING "isdn_net: Incoming call without CPN, assuming '0'\n"); - eaz = "0"; - } else - eaz = setup->eazmsn; - if (dev->net_verbose > 1) - printk(KERN_INFO "isdn_net: call from %s,%d,%d -> %s\n", nr, si1, si2, eaz); - /* Accept DATA and VOICE calls at this stage - * local eaz is checked later for allowed call types - */ - if ((si1 != 7) && (si1 != 1)) { - if (dev->net_verbose > 1) - printk(KERN_INFO "isdn_net: Service-Indicator not 1 or 7, ignored\n"); - return 0; - } - n = (isdn_net_phone *) 0; - p = dev->netdev; - ematch = wret = swapped = 0; -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: di=%d ch=%d idx=%d usg=%d\n", di, ch, idx, - dev->usage[idx]); -#endif - while (p) { - int matchret; - isdn_net_local *lp = p->local; - - /* If last check has triggered as binding-swap, revert it */ - switch (swapped) { - case 2: - isdn_net_swap_usage(idx, sidx); - /* fall through */ - case 1: - isdn_net_swapbind(di); - break; - } - swapped = 0; - /* check acceptable call types for DOV */ - my_eaz = isdn_map_eaz2msn(lp->msn, di); - if (si1 == 1) { /* it's a DOV call, check if we allow it */ - if (*my_eaz == 'v' || *my_eaz == 'V' || - *my_eaz == 'b' || *my_eaz == 'B') - my_eaz++; /* skip to allow a match */ - else - my_eaz = NULL; /* force non match */ - } else { /* it's a DATA call, check if we allow it */ - if (*my_eaz == 'b' || *my_eaz == 'B') - my_eaz++; /* skip to allow a match */ - } - if (my_eaz) - matchret = isdn_msncmp(eaz, my_eaz); - else - matchret = 1; - if (!matchret) - ematch = 1; - - /* Remember if more numbers eventually can match */ - if (matchret > wret) - wret = matchret; -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: if='%s', l.msn=%s, l.flags=%d, l.dstate=%d\n", - p->dev->name, lp->msn, lp->flags, lp->dialstate); -#endif - if ((!matchret) && /* EAZ is matching */ - (((!(lp->flags & ISDN_NET_CONNECTED)) && /* but not connected */ - (USG_NONE(dev->usage[idx]))) || /* and ch. unused or */ - ((((lp->dialstate == 4) || (lp->dialstate == 12)) && /* if dialing */ - (!(lp->flags & ISDN_NET_CALLBACK))) /* but no callback */ - ))) - { -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: match1, pdev=%d pch=%d\n", - lp->pre_device, lp->pre_channel); -#endif - if (dev->usage[idx] & ISDN_USAGE_EXCLUSIVE) { - if ((lp->pre_channel != ch) || - (lp->pre_device != di)) { - /* Here we got a problem: - * If using an ICN-Card, an incoming call is always signaled on - * on the first channel of the card, if both channels are - * down. However this channel may be bound exclusive. If the - * second channel is free, this call should be accepted. - * The solution is horribly but it runs, so what: - * We exchange the exclusive bindings of the two channels, the - * corresponding variables in the interface-structs. - */ - if (ch == 0) { - sidx = isdn_dc2minor(di, 1); -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: ch is 0\n"); -#endif - if (USG_NONE(dev->usage[sidx])) { - /* Second Channel is free, now see if it is bound - * exclusive too. */ - if (dev->usage[sidx] & ISDN_USAGE_EXCLUSIVE) { -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: 2nd channel is down and bound\n"); -#endif - /* Yes, swap bindings only, if the original - * binding is bound to channel 1 of this driver */ - if ((lp->pre_device == di) && - (lp->pre_channel == 1)) { - isdn_net_swapbind(di); - swapped = 1; - } else { - /* ... else iterate next device */ - p = (isdn_net_dev *) p->next; - continue; - } - } else { -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: 2nd channel is down and unbound\n"); -#endif - /* No, swap always and swap excl-usage also */ - isdn_net_swap_usage(idx, sidx); - isdn_net_swapbind(di); - swapped = 2; - } - /* Now check for exclusive binding again */ -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: final check\n"); -#endif - if ((dev->usage[idx] & ISDN_USAGE_EXCLUSIVE) && - ((lp->pre_channel != ch) || - (lp->pre_device != di))) { -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: final check failed\n"); -#endif - p = (isdn_net_dev *) p->next; - continue; - } - } - } else { - /* We are already on the second channel, so nothing to do */ -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: already on 2nd channel\n"); -#endif - } - } - } -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: match2\n"); -#endif - n = lp->phone[0]; - if (lp->flags & ISDN_NET_SECURE) { - while (n) { - if (!isdn_msncmp(nr, n->num)) - break; - n = (isdn_net_phone *) n->next; - } - } - if (n || (!(lp->flags & ISDN_NET_SECURE))) { -#ifdef ISDN_DEBUG_NET_ICALL - printk(KERN_DEBUG "n_fi: match3\n"); -#endif - /* matching interface found */ - - /* - * Is the state STOPPED? - * If so, no dialin is allowed, - * so reject actively. - * */ - if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { - printk(KERN_INFO "incoming call, interface %s `stopped' -> rejected\n", - p->dev->name); - return 3; - } - /* - * Is the interface up? - * If not, reject the call actively. - */ - if (!isdn_net_device_started(p)) { - printk(KERN_INFO "%s: incoming call, interface down -> rejected\n", - p->dev->name); - return 3; - } - /* Interface is up, now see if it's a slave. If so, see if - * it's master and parent slave is online. If not, reject the call. - */ - if (lp->master) { - isdn_net_local *mlp = ISDN_MASTER_PRIV(lp); - printk(KERN_DEBUG "ICALLslv: %s\n", p->dev->name); - printk(KERN_DEBUG "master=%s\n", lp->master->name); - if (mlp->flags & ISDN_NET_CONNECTED) { - printk(KERN_DEBUG "master online\n"); - /* Master is online, find parent-slave (master if first slave) */ - while (mlp->slave) { - if (ISDN_SLAVE_PRIV(mlp) == lp) - break; - mlp = ISDN_SLAVE_PRIV(mlp); - } - } else - printk(KERN_DEBUG "master offline\n"); - /* Found parent, if it's offline iterate next device */ - printk(KERN_DEBUG "mlpf: %d\n", mlp->flags & ISDN_NET_CONNECTED); - if (!(mlp->flags & ISDN_NET_CONNECTED)) { - p = (isdn_net_dev *) p->next; - continue; - } - } - if (lp->flags & ISDN_NET_CALLBACK) { - int chi; - /* - * Is the state MANUAL? - * If so, no callback can be made, - * so reject actively. - * */ - if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { - printk(KERN_INFO "incoming call for callback, interface %s `off' -> rejected\n", - p->dev->name); - return 3; - } - printk(KERN_DEBUG "%s: call from %s -> %s, start callback\n", - p->dev->name, nr, eaz); - if (lp->phone[1]) { - /* Grab a free ISDN-Channel */ - spin_lock_irqsave(&dev->lock, flags); - if ((chi = - isdn_get_free_channel( - ISDN_USAGE_NET, - lp->l2_proto, - lp->l3_proto, - lp->pre_device, - lp->pre_channel, - lp->msn) - ) < 0) { - - printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", - p->dev->name); - spin_unlock_irqrestore(&dev->lock, flags); - return 0; - } - /* Setup dialstate. */ - lp->dtimer = 0; - lp->dialstate = 11; - /* Connect interface with channel */ - isdn_net_bind_channel(lp, chi); -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - if (isdn_ppp_bind(lp) < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_unbind_channel(lp); - return 0; - } -#endif - spin_unlock_irqrestore(&dev->lock, flags); - /* Initiate dialing by returning 2 or 4 */ - return (lp->flags & ISDN_NET_CBHUP) ? 2 : 4; - } else - printk(KERN_WARNING "isdn_net: %s: No phone number\n", - p->dev->name); - return 0; - } else { - printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", - p->dev->name, nr, eaz); - /* if this interface is dialing, it does it probably on a different - device, so free this device */ - if ((lp->dialstate == 4) || (lp->dialstate == 12)) { -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - isdn_ppp_free(lp); -#endif - isdn_net_lp_disconnected(lp); - isdn_free_channel(lp->isdn_device, lp->isdn_channel, - ISDN_USAGE_NET); - } - spin_lock_irqsave(&dev->lock, flags); - dev->usage[idx] &= ISDN_USAGE_EXCLUSIVE; - dev->usage[idx] |= ISDN_USAGE_NET; - strcpy(dev->num[idx], nr); - isdn_info_update(); - dev->st_netdev[idx] = lp->netdev; - lp->isdn_device = di; - lp->isdn_channel = ch; - lp->ppp_slot = -1; - lp->flags |= ISDN_NET_CONNECTED; - lp->dialstate = 7; - lp->dtimer = 0; - lp->outgoing = 0; - lp->huptimer = 0; - lp->hupflags |= ISDN_WAITCHARGE; - lp->hupflags &= ~ISDN_HAVECHARGE; -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) { - if (isdn_ppp_bind(lp) < 0) { - isdn_net_unbind_channel(lp); - spin_unlock_irqrestore(&dev->lock, flags); - return 0; - } - } -#endif - spin_unlock_irqrestore(&dev->lock, flags); - return 1; - } - } - } - p = (isdn_net_dev *) p->next; - } - /* If none of configured EAZ/MSN matched and not verbose, be silent */ - if (!ematch || dev->net_verbose) - printk(KERN_INFO "isdn_net: call from %s -> %d %s ignored\n", nr, di, eaz); - return (wret == 2) ? 5 : 0; -} - -/* - * Search list of net-interfaces for an interface with given name. - */ -isdn_net_dev * -isdn_net_findif(char *name) -{ - isdn_net_dev *p = dev->netdev; - - while (p) { - if (!strcmp(p->dev->name, name)) - return p; - p = (isdn_net_dev *) p->next; - } - return (isdn_net_dev *) NULL; -} - -/* - * Force a net-interface to dial out. - * This is called from the userlevel-routine below or - * from isdn_net_start_xmit(). - */ -static int -isdn_net_force_dial_lp(isdn_net_local *lp) -{ - if ((!(lp->flags & ISDN_NET_CONNECTED)) && !lp->dialstate) { - int chi; - if (lp->phone[1]) { - ulong flags; - - /* Grab a free ISDN-Channel */ - spin_lock_irqsave(&dev->lock, flags); - if ((chi = isdn_get_free_channel( - ISDN_USAGE_NET, - lp->l2_proto, - lp->l3_proto, - lp->pre_device, - lp->pre_channel, - lp->msn)) < 0) { - printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", - lp->netdev->dev->name); - spin_unlock_irqrestore(&dev->lock, flags); - return -EAGAIN; - } - lp->dialstate = 1; - /* Connect interface with channel */ - isdn_net_bind_channel(lp, chi); -#ifdef CONFIG_ISDN_PPP - if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) - if (isdn_ppp_bind(lp) < 0) { - isdn_net_unbind_channel(lp); - spin_unlock_irqrestore(&dev->lock, flags); - return -EAGAIN; - } -#endif - /* Initiate dialing */ - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_dial(); - return 0; - } else - return -EINVAL; - } else - return -EBUSY; -} - -/* - * This is called from certain upper protocol layers (multilink ppp - * and x25iface encapsulation module) that want to initiate dialing - * themselves. - */ -int -isdn_net_dial_req(isdn_net_local *lp) -{ - /* is there a better error code? */ - if (!(ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_AUTO)) return -EBUSY; - - return isdn_net_force_dial_lp(lp); -} - -/* - * Force a net-interface to dial out. - * This is always called from within userspace (ISDN_IOCTL_NET_DIAL). - */ -int -isdn_net_force_dial(char *name) -{ - isdn_net_dev *p = isdn_net_findif(name); - - if (!p) - return -ENODEV; - return (isdn_net_force_dial_lp(p->local)); -} - -/* The ISDN-specific entries in the device structure. */ -static const struct net_device_ops isdn_netdev_ops = { - .ndo_init = isdn_net_init, - .ndo_open = isdn_net_open, - .ndo_stop = isdn_net_close, - .ndo_do_ioctl = isdn_net_ioctl, - - .ndo_start_xmit = isdn_net_start_xmit, - .ndo_get_stats = isdn_net_get_stats, - .ndo_tx_timeout = isdn_net_tx_timeout, -}; - -/* - * Helper for alloc_netdev() - */ -static void _isdn_setup(struct net_device *dev) -{ - isdn_net_local *lp = netdev_priv(dev); - - ether_setup(dev); - - /* Setup the generic properties */ - dev->flags = IFF_NOARP | IFF_POINTOPOINT; - - /* isdn prepends a header in the tx path, can't share skbs */ - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->header_ops = NULL; - dev->netdev_ops = &isdn_netdev_ops; - - /* for clients with MPPP maybe higher values better */ - dev->tx_queue_len = 30; - - lp->p_encap = ISDN_NET_ENCAP_RAWIP; - lp->magic = ISDN_NET_MAGIC; - lp->last = lp; - lp->next = lp; - lp->isdn_device = -1; - lp->isdn_channel = -1; - lp->pre_device = -1; - lp->pre_channel = -1; - lp->exclusive = -1; - lp->ppp_slot = -1; - lp->pppbind = -1; - skb_queue_head_init(&lp->super_tx_queue); - lp->l2_proto = ISDN_PROTO_L2_X75I; - lp->l3_proto = ISDN_PROTO_L3_TRANS; - lp->triggercps = 6000; - lp->slavedelay = 10 * HZ; - lp->hupflags = ISDN_INHUP; /* Do hangup even on incoming calls */ - lp->onhtime = 10; /* Default hangup-time for saving costs */ - lp->dialmax = 1; - /* Hangup before Callback, manual dial */ - lp->flags = ISDN_NET_CBHUP | ISDN_NET_DM_MANUAL; - lp->cbdelay = 25; /* Wait 5 secs before Callback */ - lp->dialtimeout = -1; /* Infinite Dial-Timeout */ - lp->dialwait = 5 * HZ; /* Wait 5 sec. after failed dial */ - lp->dialstarted = 0; /* Jiffies of last dial-start */ - lp->dialwait_timer = 0; /* Jiffies of earliest next dial-start */ -} - -/* - * Allocate a new network-interface and initialize its data structures. - */ -char * -isdn_net_new(char *name, struct net_device *master) -{ - isdn_net_dev *netdev; - - /* Avoid creating an existing interface */ - if (isdn_net_findif(name)) { - printk(KERN_WARNING "isdn_net: interface %s already exists\n", name); - return NULL; - } - if (name == NULL) - return NULL; - if (!(netdev = kzalloc(sizeof(isdn_net_dev), GFP_KERNEL))) { - printk(KERN_WARNING "isdn_net: Could not allocate net-device\n"); - return NULL; - } - netdev->dev = alloc_netdev(sizeof(isdn_net_local), name, - NET_NAME_UNKNOWN, _isdn_setup); - if (!netdev->dev) { - printk(KERN_WARNING "isdn_net: Could not allocate network device\n"); - kfree(netdev); - return NULL; - } - netdev->local = netdev_priv(netdev->dev); - - if (master) { - /* Device shall be a slave */ - struct net_device *p = MASTER_TO_SLAVE(master); - struct net_device *q = master; - - netdev->local->master = master; - /* Put device at end of slave-chain */ - while (p) { - q = p; - p = MASTER_TO_SLAVE(p); - } - MASTER_TO_SLAVE(q) = netdev->dev; - } else { - /* Device shall be a master */ - /* - * Watchdog timer (currently) for master only. - */ - netdev->dev->watchdog_timeo = ISDN_NET_TX_TIMEOUT; - if (register_netdev(netdev->dev) != 0) { - printk(KERN_WARNING "isdn_net: Could not register net-device\n"); - free_netdev(netdev->dev); - kfree(netdev); - return NULL; - } - } - netdev->queue = netdev->local; - spin_lock_init(&netdev->queue_lock); - - netdev->local->netdev = netdev; - - INIT_WORK(&netdev->local->tqueue, isdn_net_softint); - spin_lock_init(&netdev->local->xmit_lock); - - /* Put into to netdev-chain */ - netdev->next = (void *) dev->netdev; - dev->netdev = netdev; - return netdev->dev->name; -} - -char * -isdn_net_newslave(char *parm) -{ - char *p = strchr(parm, ','); - isdn_net_dev *n; - char newname[10]; - - if (p) { - /* Slave-Name MUST not be empty or overflow 'newname' */ - if (strscpy(newname, p + 1, sizeof(newname)) <= 0) - return NULL; - *p = 0; - /* Master must already exist */ - if (!(n = isdn_net_findif(parm))) - return NULL; - /* Master must be a real interface, not a slave */ - if (n->local->master) - return NULL; - /* Master must not be started yet */ - if (isdn_net_device_started(n)) - return NULL; - return (isdn_net_new(newname, n->dev)); - } - return NULL; -} - -/* - * Set interface-parameters. - * Always set all parameters, so the user-level application is responsible - * for not overwriting existing setups. It has to get the current - * setup first, if only selected parameters are to be changed. - */ -int -isdn_net_setcfg(isdn_net_ioctl_cfg *cfg) -{ - isdn_net_dev *p = isdn_net_findif(cfg->name); - ulong features; - int i; - int drvidx; - int chidx; - char drvid[25]; - - if (p) { - isdn_net_local *lp = p->local; - - /* See if any registered driver supports the features we want */ - features = ((1 << cfg->l2_proto) << ISDN_FEATURE_L2_SHIFT) | - ((1 << cfg->l3_proto) << ISDN_FEATURE_L3_SHIFT); - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - if (dev->drv[i]) - if ((dev->drv[i]->interface->features & features) == features) - break; - if (i == ISDN_MAX_DRIVERS) { - printk(KERN_WARNING "isdn_net: No driver with selected features\n"); - return -ENODEV; - } - if (lp->p_encap != cfg->p_encap) { -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot = p->cprot; -#endif - if (isdn_net_device_started(p)) { - printk(KERN_WARNING "%s: cannot change encap when if is up\n", - p->dev->name); - return -EBUSY; - } -#ifdef CONFIG_ISDN_X25 - if (cprot && cprot->pops) - cprot->pops->proto_del(cprot); - p->cprot = NULL; - lp->dops = NULL; - /* ... , prepare for configuration of new one ... */ - switch (cfg->p_encap) { - case ISDN_NET_ENCAP_X25IFACE: - lp->dops = &isdn_concap_reliable_dl_dops; - } - /* ... and allocate new one ... */ - p->cprot = isdn_concap_new(cfg->p_encap); - /* p -> cprot == NULL now if p_encap is not supported - by means of the concap_proto mechanism */ - /* the protocol is not configured yet; this will - happen later when isdn_net_reset() is called */ -#endif - } - switch (cfg->p_encap) { - case ISDN_NET_ENCAP_SYNCPPP: -#ifndef CONFIG_ISDN_PPP - printk(KERN_WARNING "%s: SyncPPP support not configured\n", - p->dev->name); - return -EINVAL; -#else - p->dev->type = ARPHRD_PPP; /* change ARP type */ - p->dev->addr_len = 0; -#endif - break; - case ISDN_NET_ENCAP_X25IFACE: -#ifndef CONFIG_ISDN_X25 - printk(KERN_WARNING "%s: isdn-x25 support not configured\n", - p->dev->name); - return -EINVAL; -#else - p->dev->type = ARPHRD_X25; /* change ARP type */ - p->dev->addr_len = 0; -#endif - break; - case ISDN_NET_ENCAP_CISCOHDLCK: - break; - default: - if (cfg->p_encap >= 0 && - cfg->p_encap <= ISDN_NET_ENCAP_MAX_ENCAP) - break; - printk(KERN_WARNING - "%s: encapsulation protocol %d not supported\n", - p->dev->name, cfg->p_encap); - return -EINVAL; - } - if (strlen(cfg->drvid)) { - /* A bind has been requested ... */ - char *c, - *e; - - if (strnlen(cfg->drvid, sizeof(cfg->drvid)) == - sizeof(cfg->drvid)) - return -EINVAL; - drvidx = -1; - chidx = -1; - strcpy(drvid, cfg->drvid); - if ((c = strchr(drvid, ','))) { - /* The channel-number is appended to the driver-Id with a comma */ - chidx = (int) simple_strtoul(c + 1, &e, 10); - if (e == c) - chidx = -1; - *c = '\0'; - } - for (i = 0; i < ISDN_MAX_DRIVERS; i++) - /* Lookup driver-Id in array */ - if (!(strcmp(dev->drvid[i], drvid))) { - drvidx = i; - break; - } - if ((drvidx == -1) || (chidx == -1)) - /* Either driver-Id or channel-number invalid */ - return -ENODEV; - } else { - /* Parameters are valid, so get them */ - drvidx = lp->pre_device; - chidx = lp->pre_channel; - } - if (cfg->exclusive > 0) { - unsigned long flags; - - /* If binding is exclusive, try to grab the channel */ - spin_lock_irqsave(&dev->lock, flags); - if ((i = isdn_get_free_channel(ISDN_USAGE_NET, - lp->l2_proto, lp->l3_proto, drvidx, - chidx, lp->msn)) < 0) { - /* Grab failed, because desired channel is in use */ - lp->exclusive = -1; - spin_unlock_irqrestore(&dev->lock, flags); - return -EBUSY; - } - /* All went ok, so update isdninfo */ - dev->usage[i] = ISDN_USAGE_EXCLUSIVE; - isdn_info_update(); - spin_unlock_irqrestore(&dev->lock, flags); - lp->exclusive = i; - } else { - /* Non-exclusive binding or unbind. */ - lp->exclusive = -1; - if ((lp->pre_device != -1) && (cfg->exclusive == -1)) { - isdn_unexclusive_channel(lp->pre_device, lp->pre_channel); - isdn_free_channel(lp->pre_device, lp->pre_channel, ISDN_USAGE_NET); - drvidx = -1; - chidx = -1; - } - } - strlcpy(lp->msn, cfg->eaz, sizeof(lp->msn)); - lp->pre_device = drvidx; - lp->pre_channel = chidx; - lp->onhtime = cfg->onhtime; - lp->charge = cfg->charge; - lp->l2_proto = cfg->l2_proto; - lp->l3_proto = cfg->l3_proto; - lp->cbdelay = cfg->cbdelay; - lp->dialmax = cfg->dialmax; - lp->triggercps = cfg->triggercps; - lp->slavedelay = cfg->slavedelay * HZ; - lp->pppbind = cfg->pppbind; - lp->dialtimeout = cfg->dialtimeout >= 0 ? cfg->dialtimeout * HZ : -1; - lp->dialwait = cfg->dialwait * HZ; - if (cfg->secure) - lp->flags |= ISDN_NET_SECURE; - else - lp->flags &= ~ISDN_NET_SECURE; - if (cfg->cbhup) - lp->flags |= ISDN_NET_CBHUP; - else - lp->flags &= ~ISDN_NET_CBHUP; - switch (cfg->callback) { - case 0: - lp->flags &= ~(ISDN_NET_CALLBACK | ISDN_NET_CBOUT); - break; - case 1: - lp->flags |= ISDN_NET_CALLBACK; - lp->flags &= ~ISDN_NET_CBOUT; - break; - case 2: - lp->flags |= ISDN_NET_CBOUT; - lp->flags &= ~ISDN_NET_CALLBACK; - break; - } - lp->flags &= ~ISDN_NET_DIALMODE_MASK; /* first all bits off */ - if (cfg->dialmode && !(cfg->dialmode & ISDN_NET_DIALMODE_MASK)) { - /* old isdnctrl version, where only 0 or 1 is given */ - printk(KERN_WARNING - "Old isdnctrl version detected! Please update.\n"); - lp->flags |= ISDN_NET_DM_OFF; /* turn on `off' bit */ - } - else { - lp->flags |= cfg->dialmode; /* turn on selected bits */ - } - if (cfg->chargehup) - lp->hupflags |= ISDN_CHARGEHUP; - else - lp->hupflags &= ~ISDN_CHARGEHUP; - if (cfg->ihup) - lp->hupflags |= ISDN_INHUP; - else - lp->hupflags &= ~ISDN_INHUP; - if (cfg->chargeint > 10) { - lp->hupflags |= ISDN_CHARGEHUP | ISDN_HAVECHARGE | ISDN_MANCHARGE; - lp->chargeint = cfg->chargeint * HZ; - } - if (cfg->p_encap != lp->p_encap) { - if (cfg->p_encap == ISDN_NET_ENCAP_RAWIP) { - p->dev->header_ops = NULL; - p->dev->flags = IFF_NOARP | IFF_POINTOPOINT; - } else { - p->dev->header_ops = &isdn_header_ops; - if (cfg->p_encap == ISDN_NET_ENCAP_ETHER) - p->dev->flags = IFF_BROADCAST | IFF_MULTICAST; - else - p->dev->flags = IFF_NOARP | IFF_POINTOPOINT; - } - } - lp->p_encap = cfg->p_encap; - return 0; - } - return -ENODEV; -} - -/* - * Perform get-interface-parameters.ioctl - */ -int -isdn_net_getcfg(isdn_net_ioctl_cfg *cfg) -{ - isdn_net_dev *p = isdn_net_findif(cfg->name); - - if (p) { - isdn_net_local *lp = p->local; - - strcpy(cfg->eaz, lp->msn); - cfg->exclusive = lp->exclusive; - if (lp->pre_device >= 0) { - sprintf(cfg->drvid, "%s,%d", dev->drvid[lp->pre_device], - lp->pre_channel); - } else - cfg->drvid[0] = '\0'; - cfg->onhtime = lp->onhtime; - cfg->charge = lp->charge; - cfg->l2_proto = lp->l2_proto; - cfg->l3_proto = lp->l3_proto; - cfg->p_encap = lp->p_encap; - cfg->secure = (lp->flags & ISDN_NET_SECURE) ? 1 : 0; - cfg->callback = 0; - if (lp->flags & ISDN_NET_CALLBACK) - cfg->callback = 1; - if (lp->flags & ISDN_NET_CBOUT) - cfg->callback = 2; - cfg->cbhup = (lp->flags & ISDN_NET_CBHUP) ? 1 : 0; - cfg->dialmode = lp->flags & ISDN_NET_DIALMODE_MASK; - cfg->chargehup = (lp->hupflags & ISDN_CHARGEHUP) ? 1 : 0; - cfg->ihup = (lp->hupflags & ISDN_INHUP) ? 1 : 0; - cfg->cbdelay = lp->cbdelay; - cfg->dialmax = lp->dialmax; - cfg->triggercps = lp->triggercps; - cfg->slavedelay = lp->slavedelay / HZ; - cfg->chargeint = (lp->hupflags & ISDN_CHARGEHUP) ? - (lp->chargeint / HZ) : 0; - cfg->pppbind = lp->pppbind; - cfg->dialtimeout = lp->dialtimeout >= 0 ? lp->dialtimeout / HZ : -1; - cfg->dialwait = lp->dialwait / HZ; - if (lp->slave) { - if (strlen(lp->slave->name) >= 10) - strcpy(cfg->slave, "too-long"); - else - strcpy(cfg->slave, lp->slave->name); - } else - cfg->slave[0] = '\0'; - if (lp->master) { - if (strlen(lp->master->name) >= 10) - strcpy(cfg->master, "too-long"); - else - strcpy(cfg->master, lp->master->name); - } else - cfg->master[0] = '\0'; - return 0; - } - return -ENODEV; -} - -/* - * Add a phone-number to an interface. - */ -int -isdn_net_addphone(isdn_net_ioctl_phone *phone) -{ - isdn_net_dev *p = isdn_net_findif(phone->name); - isdn_net_phone *n; - - if (p) { - if (!(n = kmalloc(sizeof(isdn_net_phone), GFP_KERNEL))) - return -ENOMEM; - strlcpy(n->num, phone->phone, sizeof(n->num)); - n->next = p->local->phone[phone->outgoing & 1]; - p->local->phone[phone->outgoing & 1] = n; - return 0; - } - return -ENODEV; -} - -/* - * Copy a string of all phone-numbers of an interface to user space. - * This might sleep and must be called with the isdn semaphore down. - */ -int -isdn_net_getphones(isdn_net_ioctl_phone *phone, char __user *phones) -{ - isdn_net_dev *p = isdn_net_findif(phone->name); - int inout = phone->outgoing & 1; - int more = 0; - int count = 0; - isdn_net_phone *n; - - if (!p) - return -ENODEV; - inout &= 1; - for (n = p->local->phone[inout]; n; n = n->next) { - if (more) { - put_user(' ', phones++); - count++; - } - if (copy_to_user(phones, n->num, strlen(n->num) + 1)) { - return -EFAULT; - } - phones += strlen(n->num); - count += strlen(n->num); - more = 1; - } - put_user(0, phones); - count++; - return count; -} - -/* - * Copy a string containing the peer's phone number of a connected interface - * to user space. - */ -int -isdn_net_getpeer(isdn_net_ioctl_phone *phone, isdn_net_ioctl_phone __user *peer) -{ - isdn_net_dev *p = isdn_net_findif(phone->name); - int ch, dv, idx; - - if (!p) - return -ENODEV; - /* - * Theoretical race: while this executes, the remote number might - * become invalid (hang up) or change (new connection), resulting - * in (partially) wrong number copied to user. This race - * currently ignored. - */ - ch = p->local->isdn_channel; - dv = p->local->isdn_device; - if (ch < 0 && dv < 0) - return -ENOTCONN; - idx = isdn_dc2minor(dv, ch); - if (idx < 0) - return -ENODEV; - /* for pre-bound channels, we need this extra check */ - if (strncmp(dev->num[idx], "???", 3) == 0) - return -ENOTCONN; - strncpy(phone->phone, dev->num[idx], ISDN_MSNLEN); - phone->outgoing = USG_OUTGOING(dev->usage[idx]); - if (copy_to_user(peer, phone, sizeof(*peer))) - return -EFAULT; - return 0; -} -/* - * Delete a phone-number from an interface. - */ -int -isdn_net_delphone(isdn_net_ioctl_phone *phone) -{ - isdn_net_dev *p = isdn_net_findif(phone->name); - int inout = phone->outgoing & 1; - isdn_net_phone *n; - isdn_net_phone *m; - - if (p) { - n = p->local->phone[inout]; - m = NULL; - while (n) { - if (!strcmp(n->num, phone->phone)) { - if (p->local->dial == n) - p->local->dial = n->next; - if (m) - m->next = n->next; - else - p->local->phone[inout] = n->next; - kfree(n); - return 0; - } - m = n; - n = (isdn_net_phone *) n->next; - } - return -EINVAL; - } - return -ENODEV; -} - -/* - * Delete all phone-numbers of an interface. - */ -static int -isdn_net_rmallphone(isdn_net_dev *p) -{ - isdn_net_phone *n; - isdn_net_phone *m; - int i; - - for (i = 0; i < 2; i++) { - n = p->local->phone[i]; - while (n) { - m = n->next; - kfree(n); - n = m; - } - p->local->phone[i] = NULL; - } - p->local->dial = NULL; - return 0; -} - -/* - * Force a hangup of a network-interface. - */ -int -isdn_net_force_hangup(char *name) -{ - isdn_net_dev *p = isdn_net_findif(name); - struct net_device *q; - - if (p) { - if (p->local->isdn_device < 0) - return 1; - q = p->local->slave; - /* If this interface has slaves, do a hangup for them also. */ - while (q) { - isdn_net_hangup(q); - q = MASTER_TO_SLAVE(q); - } - isdn_net_hangup(p->dev); - return 0; - } - return -ENODEV; -} - -/* - * Helper-function for isdn_net_rm: Do the real work. - */ -static int -isdn_net_realrm(isdn_net_dev *p, isdn_net_dev *q) -{ - u_long flags; - - if (isdn_net_device_started(p)) { - return -EBUSY; - } -#ifdef CONFIG_ISDN_X25 - if (p->cprot && p->cprot->pops) - p->cprot->pops->proto_del(p->cprot); -#endif - /* Free all phone-entries */ - isdn_net_rmallphone(p); - /* If interface is bound exclusive, free channel-usage */ - if (p->local->exclusive != -1) - isdn_unexclusive_channel(p->local->pre_device, p->local->pre_channel); - if (p->local->master) { - /* It's a slave-device, so update master's slave-pointer if necessary */ - if (((isdn_net_local *) ISDN_MASTER_PRIV(p->local))->slave == - p->dev) - ((isdn_net_local *)ISDN_MASTER_PRIV(p->local))->slave = - p->local->slave; - } else { - /* Unregister only if it's a master-device */ - unregister_netdev(p->dev); - } - /* Unlink device from chain */ - spin_lock_irqsave(&dev->lock, flags); - if (q) - q->next = p->next; - else - dev->netdev = p->next; - if (p->local->slave) { - /* If this interface has a slave, remove it also */ - char *slavename = p->local->slave->name; - isdn_net_dev *n = dev->netdev; - q = NULL; - while (n) { - if (!strcmp(n->dev->name, slavename)) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_net_realrm(n, q); - spin_lock_irqsave(&dev->lock, flags); - break; - } - q = n; - n = (isdn_net_dev *)n->next; - } - } - spin_unlock_irqrestore(&dev->lock, flags); - /* If no more net-devices remain, disable auto-hangup timer */ - if (dev->netdev == NULL) - isdn_timer_ctrl(ISDN_TIMER_NETHANGUP, 0); - free_netdev(p->dev); - kfree(p); - - return 0; -} - -/* - * Remove a single network-interface. - */ -int -isdn_net_rm(char *name) -{ - u_long flags; - isdn_net_dev *p; - isdn_net_dev *q; - - /* Search name in netdev-chain */ - spin_lock_irqsave(&dev->lock, flags); - p = dev->netdev; - q = NULL; - while (p) { - if (!strcmp(p->dev->name, name)) { - spin_unlock_irqrestore(&dev->lock, flags); - return (isdn_net_realrm(p, q)); - } - q = p; - p = (isdn_net_dev *) p->next; - } - spin_unlock_irqrestore(&dev->lock, flags); - /* If no more net-devices remain, disable auto-hangup timer */ - if (dev->netdev == NULL) - isdn_timer_ctrl(ISDN_TIMER_NETHANGUP, 0); - return -ENODEV; -} - -/* - * Remove all network-interfaces - */ -int -isdn_net_rmall(void) -{ - u_long flags; - int ret; - - /* Walk through netdev-chain */ - spin_lock_irqsave(&dev->lock, flags); - while (dev->netdev) { - if (!dev->netdev->local->master) { - /* Remove master-devices only, slaves get removed with their master */ - spin_unlock_irqrestore(&dev->lock, flags); - if ((ret = isdn_net_realrm(dev->netdev, NULL))) { - return ret; - } - spin_lock_irqsave(&dev->lock, flags); - } - } - dev->netdev = NULL; - spin_unlock_irqrestore(&dev->lock, flags); - return 0; -} diff --git a/drivers/isdn/i4l/isdn_net.h b/drivers/isdn/i4l/isdn_net.h deleted file mode 100644 index cca6d68da171..000000000000 --- a/drivers/isdn/i4l/isdn_net.h +++ /dev/null @@ -1,151 +0,0 @@ -/* $Id: isdn_net.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem, network related functions (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -/* Definitions for hupflags: */ -#define ISDN_WAITCHARGE 1 /* did not get a charge info yet */ -#define ISDN_HAVECHARGE 2 /* We know a charge info */ -#define ISDN_CHARGEHUP 4 /* We want to use the charge mechanism */ -#define ISDN_INHUP 8 /* Even if incoming, close after huptimeout */ -#define ISDN_MANCHARGE 16 /* Charge Interval manually set */ - -/* - * Definitions for Cisco-HDLC header. - */ - -#define CISCO_ADDR_UNICAST 0x0f -#define CISCO_ADDR_BROADCAST 0x8f -#define CISCO_CTRL 0x00 -#define CISCO_TYPE_CDP 0x2000 -#define CISCO_TYPE_SLARP 0x8035 -#define CISCO_SLARP_REQUEST 0 -#define CISCO_SLARP_REPLY 1 -#define CISCO_SLARP_KEEPALIVE 2 - -extern char *isdn_net_new(char *, struct net_device *); -extern char *isdn_net_newslave(char *); -extern int isdn_net_rm(char *); -extern int isdn_net_rmall(void); -extern int isdn_net_stat_callback(int, isdn_ctrl *); -extern int isdn_net_setcfg(isdn_net_ioctl_cfg *); -extern int isdn_net_getcfg(isdn_net_ioctl_cfg *); -extern int isdn_net_addphone(isdn_net_ioctl_phone *); -extern int isdn_net_getphones(isdn_net_ioctl_phone *, char __user *); -extern int isdn_net_getpeer(isdn_net_ioctl_phone *, isdn_net_ioctl_phone __user *); -extern int isdn_net_delphone(isdn_net_ioctl_phone *); -extern int isdn_net_find_icall(int, int, int, setup_parm *); -extern void isdn_net_hangup(struct net_device *); -extern void isdn_net_dial(void); -extern void isdn_net_autohup(void); -extern int isdn_net_force_hangup(char *); -extern int isdn_net_force_dial(char *); -extern isdn_net_dev *isdn_net_findif(char *); -extern int isdn_net_rcv_skb(int, struct sk_buff *); -extern int isdn_net_dial_req(isdn_net_local *); -extern void isdn_net_writebuf_skb(isdn_net_local *lp, struct sk_buff *skb); -extern void isdn_net_write_super(isdn_net_local *lp, struct sk_buff *skb); - -#define ISDN_NET_MAX_QUEUE_LENGTH 2 - -#define ISDN_MASTER_PRIV(lp) ((isdn_net_local *) netdev_priv(lp->master)) -#define ISDN_SLAVE_PRIV(lp) ((isdn_net_local *) netdev_priv(lp->slave)) -#define MASTER_TO_SLAVE(master) \ - (((isdn_net_local *) netdev_priv(master))->slave) - -/* - * is this particular channel busy? - */ -static __inline__ int isdn_net_lp_busy(isdn_net_local *lp) -{ - if (atomic_read(&lp->frame_cnt) < ISDN_NET_MAX_QUEUE_LENGTH) - return 0; - else - return 1; -} - -/* - * For the given net device, this will get a non-busy channel out of the - * corresponding bundle. The returned channel is locked. - */ -static __inline__ isdn_net_local *isdn_net_get_locked_lp(isdn_net_dev *nd) -{ - unsigned long flags; - isdn_net_local *lp; - - spin_lock_irqsave(&nd->queue_lock, flags); - lp = nd->queue; /* get lp on top of queue */ - while (isdn_net_lp_busy(nd->queue)) { - nd->queue = nd->queue->next; - if (nd->queue == lp) { /* not found -- should never happen */ - lp = NULL; - goto errout; - } - } - lp = nd->queue; - nd->queue = nd->queue->next; - spin_unlock_irqrestore(&nd->queue_lock, flags); - spin_lock(&lp->xmit_lock); - local_bh_disable(); - return lp; -errout: - spin_unlock_irqrestore(&nd->queue_lock, flags); - return lp; -} - -/* - * add a channel to a bundle - */ -static __inline__ void isdn_net_add_to_bundle(isdn_net_dev *nd, isdn_net_local *nlp) -{ - isdn_net_local *lp; - unsigned long flags; - - spin_lock_irqsave(&nd->queue_lock, flags); - - lp = nd->queue; -// printk(KERN_DEBUG "%s: lp:%s(%p) nlp:%s(%p) last(%p)\n", -// __func__, lp->name, lp, nlp->name, nlp, lp->last); - nlp->last = lp->last; - lp->last->next = nlp; - lp->last = nlp; - nlp->next = lp; - nd->queue = nlp; - - spin_unlock_irqrestore(&nd->queue_lock, flags); -} -/* - * remove a channel from the bundle it belongs to - */ -static __inline__ void isdn_net_rm_from_bundle(isdn_net_local *lp) -{ - isdn_net_local *master_lp = lp; - unsigned long flags; - - if (lp->master) - master_lp = ISDN_MASTER_PRIV(lp); - -// printk(KERN_DEBUG "%s: lp:%s(%p) mlp:%s(%p) last(%p) next(%p) mndq(%p)\n", -// __func__, lp->name, lp, master_lp->name, master_lp, lp->last, lp->next, master_lp->netdev->queue); - spin_lock_irqsave(&master_lp->netdev->queue_lock, flags); - lp->last->next = lp->next; - lp->next->last = lp->last; - if (master_lp->netdev->queue == lp) { - master_lp->netdev->queue = lp->next; - if (lp->next == lp) { /* last in queue */ - master_lp->netdev->queue = master_lp->netdev->local; - } - } - lp->next = lp->last = lp; /* (re)set own pointers */ -// printk(KERN_DEBUG "%s: mndq(%p)\n", -// __func__, master_lp->netdev->queue); - spin_unlock_irqrestore(&master_lp->netdev->queue_lock, flags); -} diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c deleted file mode 100644 index 7e0f419c14f8..000000000000 --- a/drivers/isdn/i4l/isdn_ppp.c +++ /dev/null @@ -1,3046 +0,0 @@ -/* $Id: isdn_ppp.c,v 1.1.2.3 2004/02/10 01:07:13 keil Exp $ - * - * Linux ISDN subsystem, functions for synchronous PPP (linklevel). - * - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include -#ifdef CONFIG_IPPP_FILTER -#include -#endif - -#include "isdn_common.h" -#include "isdn_ppp.h" -#include "isdn_net.h" - -#ifndef PPP_IPX -#define PPP_IPX 0x002b -#endif - -/* Prototypes */ -static int isdn_ppp_fill_rq(unsigned char *buf, int len, int proto, int slot); -static int isdn_ppp_closewait(int slot); -static void isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb, int proto); -static int isdn_ppp_if_get_unit(char *namebuf); -static int isdn_ppp_set_compressor(struct ippp_struct *is, struct isdn_ppp_comp_data *); -static struct sk_buff *isdn_ppp_decompress(struct sk_buff *, - struct ippp_struct *, struct ippp_struct *, int *proto); -static void isdn_ppp_receive_ccp(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb, int proto); -static struct sk_buff *isdn_ppp_compress(struct sk_buff *skb_in, int *proto, - struct ippp_struct *is, struct ippp_struct *master, int type); -static void isdn_ppp_send_ccp(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb); - -/* New CCP stuff */ -static void isdn_ppp_ccp_kickup(struct ippp_struct *is); -static void isdn_ppp_ccp_xmit_reset(struct ippp_struct *is, int proto, - unsigned char code, unsigned char id, - unsigned char *data, int len); -static struct ippp_ccp_reset *isdn_ppp_ccp_reset_alloc(struct ippp_struct *is); -static void isdn_ppp_ccp_reset_free(struct ippp_struct *is); -static void isdn_ppp_ccp_reset_free_state(struct ippp_struct *is, - unsigned char id); -static void isdn_ppp_ccp_timer_callback(struct timer_list *t); -static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_struct *is, - unsigned char id); -static void isdn_ppp_ccp_reset_trans(struct ippp_struct *is, - struct isdn_ppp_resetparams *rp); -static void isdn_ppp_ccp_reset_ack_rcvd(struct ippp_struct *is, - unsigned char id); - - - -#ifdef CONFIG_ISDN_MPP -static ippp_bundle *isdn_ppp_bundle_arr = NULL; - -static int isdn_ppp_mp_bundle_array_init(void); -static int isdn_ppp_mp_init(isdn_net_local *lp, ippp_bundle *add_to); -static void isdn_ppp_mp_receive(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb); -static void isdn_ppp_mp_cleanup(isdn_net_local *lp); - -static int isdn_ppp_bundle(struct ippp_struct *, int unit); -#endif /* CONFIG_ISDN_MPP */ - -char *isdn_ppp_revision = "$Revision: 1.1.2.3 $"; - -static struct ippp_struct *ippp_table[ISDN_MAX_CHANNELS]; - -static struct isdn_ppp_compressor *ipc_head = NULL; - -/* - * frame log (debug) - */ -static void -isdn_ppp_frame_log(char *info, char *data, int len, int maxlen, int unit, int slot) -{ - int cnt, - j, - i; - char buf[80]; - - if (len < maxlen) - maxlen = len; - - for (i = 0, cnt = 0; cnt < maxlen; i++) { - for (j = 0; j < 16 && cnt < maxlen; j++, cnt++) - sprintf(buf + j * 3, "%02x ", (unsigned char)data[cnt]); - printk(KERN_DEBUG "[%d/%d].%s[%d]: %s\n", unit, slot, info, i, buf); - } -} - -/* - * unbind isdn_net_local <=> ippp-device - * note: it can happen, that we hangup/free the master before the slaves - * in this case we bind another lp to the master device - */ -int -isdn_ppp_free(isdn_net_local *lp) -{ - struct ippp_struct *is; - - if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: ppp_slot(%d) out of range\n", - __func__, lp->ppp_slot); - return 0; - } - -#ifdef CONFIG_ISDN_MPP - spin_lock(&lp->netdev->pb->lock); -#endif - isdn_net_rm_from_bundle(lp); -#ifdef CONFIG_ISDN_MPP - if (lp->netdev->pb->ref_ct == 1) /* last link in queue? */ - isdn_ppp_mp_cleanup(lp); - - lp->netdev->pb->ref_ct--; - spin_unlock(&lp->netdev->pb->lock); -#endif /* CONFIG_ISDN_MPP */ - if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: ppp_slot(%d) now invalid\n", - __func__, lp->ppp_slot); - return 0; - } - is = ippp_table[lp->ppp_slot]; - if ((is->state & IPPP_CONNECT)) - isdn_ppp_closewait(lp->ppp_slot); /* force wakeup on ippp device */ - else if (is->state & IPPP_ASSIGNED) - is->state = IPPP_OPEN; /* fallback to 'OPEN but not ASSIGNED' state */ - - if (is->debug & 0x1) - printk(KERN_DEBUG "isdn_ppp_free %d %lx %lx\n", lp->ppp_slot, (long) lp, (long) is->lp); - - is->lp = NULL; /* link is down .. set lp to NULL */ - lp->ppp_slot = -1; /* is this OK ?? */ - - return 0; -} - -/* - * bind isdn_net_local <=> ippp-device - * - * This function is allways called with holding dev->lock so - * no additional lock is needed - */ -int -isdn_ppp_bind(isdn_net_local *lp) -{ - int i; - int unit = 0; - struct ippp_struct *is; - int retval; - - if (lp->pppbind < 0) { /* device bounded to ippp device ? */ - isdn_net_dev *net_dev = dev->netdev; - char exclusive[ISDN_MAX_CHANNELS]; /* exclusive flags */ - memset(exclusive, 0, ISDN_MAX_CHANNELS); - while (net_dev) { /* step through net devices to find exclusive minors */ - isdn_net_local *lp = net_dev->local; - if (lp->pppbind >= 0) - exclusive[lp->pppbind] = 1; - net_dev = net_dev->next; - } - /* - * search a free device / slot - */ - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (ippp_table[i]->state == IPPP_OPEN && !exclusive[ippp_table[i]->minor]) { /* OPEN, but not connected! */ - break; - } - } - } else { - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (ippp_table[i]->minor == lp->pppbind && - (ippp_table[i]->state & IPPP_OPEN) == IPPP_OPEN) - break; - } - } - - if (i >= ISDN_MAX_CHANNELS) { - printk(KERN_WARNING "isdn_ppp_bind: Can't find a (free) connection to the ipppd daemon.\n"); - retval = -1; - goto out; - } - /* get unit number from interface name .. ugly! */ - unit = isdn_ppp_if_get_unit(lp->netdev->dev->name); - if (unit < 0) { - printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", - lp->netdev->dev->name); - retval = -1; - goto out; - } - - lp->ppp_slot = i; - is = ippp_table[i]; - is->lp = lp; - is->unit = unit; - is->state = IPPP_OPEN | IPPP_ASSIGNED; /* assigned to a netdevice but not connected */ -#ifdef CONFIG_ISDN_MPP - retval = isdn_ppp_mp_init(lp, NULL); - if (retval < 0) - goto out; -#endif /* CONFIG_ISDN_MPP */ - - retval = lp->ppp_slot; - -out: - return retval; -} - -/* - * kick the ipppd on the device - * (wakes up daemon after B-channel connect) - */ - -void -isdn_ppp_wakeup_daemon(isdn_net_local *lp) -{ - if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: ppp_slot(%d) out of range\n", - __func__, lp->ppp_slot); - return; - } - ippp_table[lp->ppp_slot]->state = IPPP_OPEN | IPPP_CONNECT | IPPP_NOBLOCK; - wake_up_interruptible(&ippp_table[lp->ppp_slot]->wq); -} - -/* - * there was a hangup on the netdevice - * force wakeup of the ippp device - * go into 'device waits for release' state - */ -static int -isdn_ppp_closewait(int slot) -{ - struct ippp_struct *is; - - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: slot(%d) out of range\n", - __func__, slot); - return 0; - } - is = ippp_table[slot]; - if (is->state) - wake_up_interruptible(&is->wq); - is->state = IPPP_CLOSEWAIT; - return 1; -} - -/* - * isdn_ppp_find_slot / isdn_ppp_free_slot - */ - -static int -isdn_ppp_get_slot(void) -{ - int i; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (!ippp_table[i]->state) - return i; - } - return -1; -} - -/* - * isdn_ppp_open - */ - -int -isdn_ppp_open(int min, struct file *file) -{ - int slot; - struct ippp_struct *is; - - if (min < 0 || min >= ISDN_MAX_CHANNELS) - return -ENODEV; - - slot = isdn_ppp_get_slot(); - if (slot < 0) { - return -EBUSY; - } - is = file->private_data = ippp_table[slot]; - - printk(KERN_DEBUG "ippp, open, slot: %d, minor: %d, state: %04x\n", - slot, min, is->state); - - /* compression stuff */ - is->link_compressor = is->compressor = NULL; - is->link_decompressor = is->decompressor = NULL; - is->link_comp_stat = is->comp_stat = NULL; - is->link_decomp_stat = is->decomp_stat = NULL; - is->compflags = 0; - - is->reset = isdn_ppp_ccp_reset_alloc(is); - if (!is->reset) - return -ENOMEM; - - is->lp = NULL; - is->mp_seqno = 0; /* MP sequence number */ - is->pppcfg = 0; /* ppp configuration */ - is->mpppcfg = 0; /* mppp configuration */ - is->last_link_seqno = -1; /* MP: maybe set to Bundle-MIN, when joining a bundle ?? */ - is->unit = -1; /* set, when we have our interface */ - is->mru = 1524; /* MRU, default 1524 */ - is->maxcid = 16; /* VJ: maxcid */ - is->tk = current; - init_waitqueue_head(&is->wq); - is->first = is->rq + NUM_RCV_BUFFS - 1; /* receive queue */ - is->last = is->rq; - is->minor = min; -#ifdef CONFIG_ISDN_PPP_VJ - /* - * VJ header compression init - */ - is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ - if (IS_ERR(is->slcomp)) { - isdn_ppp_ccp_reset_free(is); - return PTR_ERR(is->slcomp); - } -#endif -#ifdef CONFIG_IPPP_FILTER - is->pass_filter = NULL; - is->active_filter = NULL; -#endif - is->state = IPPP_OPEN; - - return 0; -} - -/* - * release ippp device - */ -void -isdn_ppp_release(int min, struct file *file) -{ - int i; - struct ippp_struct *is; - - if (min < 0 || min >= ISDN_MAX_CHANNELS) - return; - is = file->private_data; - - if (!is) { - printk(KERN_ERR "%s: no file->private_data\n", __func__); - return; - } - if (is->debug & 0x1) - printk(KERN_DEBUG "ippp: release, minor: %d %lx\n", min, (long) is->lp); - - if (is->lp) { /* a lp address says: this link is still up */ - isdn_net_dev *p = is->lp->netdev; - - if (!p) { - printk(KERN_ERR "%s: no lp->netdev\n", __func__); - return; - } - is->state &= ~IPPP_CONNECT; /* -> effect: no call of wakeup */ - /* - * isdn_net_hangup() calls isdn_ppp_free() - * isdn_ppp_free() sets is->lp to NULL and lp->ppp_slot to -1 - * removing the IPPP_CONNECT flag omits calling of isdn_ppp_wakeup_daemon() - */ - isdn_net_hangup(p->dev); - } - for (i = 0; i < NUM_RCV_BUFFS; i++) { - kfree(is->rq[i].buf); - is->rq[i].buf = NULL; - } - is->first = is->rq + NUM_RCV_BUFFS - 1; /* receive queue */ - is->last = is->rq; - -#ifdef CONFIG_ISDN_PPP_VJ -/* TODO: if this was the previous master: link the slcomp to the new master */ - slhc_free(is->slcomp); - is->slcomp = NULL; -#endif -#ifdef CONFIG_IPPP_FILTER - if (is->pass_filter) { - bpf_prog_destroy(is->pass_filter); - is->pass_filter = NULL; - } - - if (is->active_filter) { - bpf_prog_destroy(is->active_filter); - is->active_filter = NULL; - } -#endif - -/* TODO: if this was the previous master: link the stuff to the new master */ - if (is->comp_stat) - is->compressor->free(is->comp_stat); - if (is->link_comp_stat) - is->link_compressor->free(is->link_comp_stat); - if (is->link_decomp_stat) - is->link_decompressor->free(is->link_decomp_stat); - if (is->decomp_stat) - is->decompressor->free(is->decomp_stat); - is->compressor = is->link_compressor = NULL; - is->decompressor = is->link_decompressor = NULL; - is->comp_stat = is->link_comp_stat = NULL; - is->decomp_stat = is->link_decomp_stat = NULL; - - /* Clean up if necessary */ - if (is->reset) - isdn_ppp_ccp_reset_free(is); - - /* this slot is ready for new connections */ - is->state = 0; -} - -/* - * get_arg .. ioctl helper - */ -static int -get_arg(void __user *b, void *val, int len) -{ - if (len <= 0) - len = sizeof(void *); - if (copy_from_user(val, b, len)) - return -EFAULT; - return 0; -} - -/* - * set arg .. ioctl helper - */ -static int -set_arg(void __user *b, void *val, int len) -{ - if (len <= 0) - len = sizeof(void *); - if (copy_to_user(b, val, len)) - return -EFAULT; - return 0; -} - -#ifdef CONFIG_IPPP_FILTER -static int get_filter(void __user *arg, struct sock_filter **p) -{ - struct sock_fprog uprog; - struct sock_filter *code = NULL; - int len; - - if (copy_from_user(&uprog, arg, sizeof(uprog))) - return -EFAULT; - - if (!uprog.len) { - *p = NULL; - return 0; - } - - /* uprog.len is unsigned short, so no overflow here */ - len = uprog.len * sizeof(struct sock_filter); - code = memdup_user(uprog.filter, len); - if (IS_ERR(code)) - return PTR_ERR(code); - - *p = code; - return uprog.len; -} -#endif /* CONFIG_IPPP_FILTER */ - -/* - * ippp device ioctl - */ -int -isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) -{ - unsigned long val; - int r, i, j; - struct ippp_struct *is; - isdn_net_local *lp; - struct isdn_ppp_comp_data data; - void __user *argp = (void __user *)arg; - - is = file->private_data; - lp = is->lp; - - if (is->debug & 0x1) - printk(KERN_DEBUG "isdn_ppp_ioctl: minor: %d cmd: %x state: %x\n", min, cmd, is->state); - - if (!(is->state & IPPP_OPEN)) - return -EINVAL; - - switch (cmd) { - case PPPIOCBUNDLE: -#ifdef CONFIG_ISDN_MPP - if (!(is->state & IPPP_CONNECT)) - return -EINVAL; - if ((r = get_arg(argp, &val, sizeof(val)))) - return r; - printk(KERN_DEBUG "iPPP-bundle: minor: %d, slave unit: %d, master unit: %d\n", - (int) min, (int) is->unit, (int) val); - return isdn_ppp_bundle(is, val); -#else - return -1; -#endif - break; - case PPPIOCGUNIT: /* get ppp/isdn unit number */ - if ((r = set_arg(argp, &is->unit, sizeof(is->unit)))) - return r; - break; - case PPPIOCGIFNAME: - if (!lp) - return -EINVAL; - if ((r = set_arg(argp, lp->netdev->dev->name, - strlen(lp->netdev->dev->name)))) - return r; - break; - case PPPIOCGMPFLAGS: /* get configuration flags */ - if ((r = set_arg(argp, &is->mpppcfg, sizeof(is->mpppcfg)))) - return r; - break; - case PPPIOCSMPFLAGS: /* set configuration flags */ - if ((r = get_arg(argp, &val, sizeof(val)))) - return r; - is->mpppcfg = val; - break; - case PPPIOCGFLAGS: /* get configuration flags */ - if ((r = set_arg(argp, &is->pppcfg, sizeof(is->pppcfg)))) - return r; - break; - case PPPIOCSFLAGS: /* set configuration flags */ - if ((r = get_arg(argp, &val, sizeof(val)))) { - return r; - } - if (val & SC_ENABLE_IP && !(is->pppcfg & SC_ENABLE_IP) && (is->state & IPPP_CONNECT)) { - if (lp) { - /* OK .. we are ready to send buffers */ - is->pppcfg = val; /* isdn_ppp_xmit test for SC_ENABLE_IP !!! */ - netif_wake_queue(lp->netdev->dev); - break; - } - } - is->pppcfg = val; - break; - case PPPIOCGIDLE: /* get idle time information */ - if (lp) { - struct ppp_idle pidle; - pidle.xmit_idle = pidle.recv_idle = lp->huptimer; - if ((r = set_arg(argp, &pidle, sizeof(struct ppp_idle)))) - return r; - } - break; - case PPPIOCSMRU: /* set receive unit size for PPP */ - if ((r = get_arg(argp, &val, sizeof(val)))) - return r; - is->mru = val; - break; - case PPPIOCSMPMRU: - break; - case PPPIOCSMPMTU: - break; - case PPPIOCSMAXCID: /* set the maximum compression slot id */ - if ((r = get_arg(argp, &val, sizeof(val)))) - return r; - val++; - if (is->maxcid != val) { -#ifdef CONFIG_ISDN_PPP_VJ - struct slcompress *sltmp; -#endif - if (is->debug & 0x1) - printk(KERN_DEBUG "ippp, ioctl: changed MAXCID to %ld\n", val); - is->maxcid = val; -#ifdef CONFIG_ISDN_PPP_VJ - sltmp = slhc_init(16, val); - if (IS_ERR(sltmp)) - return PTR_ERR(sltmp); - if (is->slcomp) - slhc_free(is->slcomp); - is->slcomp = sltmp; -#endif - } - break; - case PPPIOCGDEBUG: - if ((r = set_arg(argp, &is->debug, sizeof(is->debug)))) - return r; - break; - case PPPIOCSDEBUG: - if ((r = get_arg(argp, &val, sizeof(val)))) - return r; - is->debug = val; - break; - case PPPIOCGCOMPRESSORS: - { - unsigned long protos[8] = {0,}; - struct isdn_ppp_compressor *ipc = ipc_head; - while (ipc) { - j = ipc->num / (sizeof(long) * 8); - i = ipc->num % (sizeof(long) * 8); - if (j < 8) - protos[j] |= (1UL << i); - ipc = ipc->next; - } - if ((r = set_arg(argp, protos, 8 * sizeof(long)))) - return r; - } - break; - case PPPIOCSCOMPRESSOR: - if ((r = get_arg(argp, &data, sizeof(struct isdn_ppp_comp_data)))) - return r; - return isdn_ppp_set_compressor(is, &data); - case PPPIOCGCALLINFO: - { - struct pppcallinfo pci; - memset((char *)&pci, 0, sizeof(struct pppcallinfo)); - if (lp) - { - strncpy(pci.local_num, lp->msn, 63); - if (lp->dial) { - strncpy(pci.remote_num, lp->dial->num, 63); - } - pci.charge_units = lp->charge; - if (lp->outgoing) - pci.calltype = CALLTYPE_OUTGOING; - else - pci.calltype = CALLTYPE_INCOMING; - if (lp->flags & ISDN_NET_CALLBACK) - pci.calltype |= CALLTYPE_CALLBACK; - } - return set_arg(argp, &pci, sizeof(struct pppcallinfo)); - } -#ifdef CONFIG_IPPP_FILTER - case PPPIOCSPASS: - { - struct sock_fprog_kern fprog; - struct sock_filter *code; - int err, len = get_filter(argp, &code); - - if (len < 0) - return len; - - fprog.len = len; - fprog.filter = code; - - if (is->pass_filter) { - bpf_prog_destroy(is->pass_filter); - is->pass_filter = NULL; - } - if (fprog.filter != NULL) - err = bpf_prog_create(&is->pass_filter, &fprog); - else - err = 0; - kfree(code); - - return err; - } - case PPPIOCSACTIVE: - { - struct sock_fprog_kern fprog; - struct sock_filter *code; - int err, len = get_filter(argp, &code); - - if (len < 0) - return len; - - fprog.len = len; - fprog.filter = code; - - if (is->active_filter) { - bpf_prog_destroy(is->active_filter); - is->active_filter = NULL; - } - if (fprog.filter != NULL) - err = bpf_prog_create(&is->active_filter, &fprog); - else - err = 0; - kfree(code); - - return err; - } -#endif /* CONFIG_IPPP_FILTER */ - default: - break; - } - return 0; -} - -__poll_t -isdn_ppp_poll(struct file *file, poll_table *wait) -{ - __poll_t mask; - struct ippp_buf_queue *bf, *bl; - u_long flags; - struct ippp_struct *is; - - is = file->private_data; - - if (is->debug & 0x2) - printk(KERN_DEBUG "isdn_ppp_poll: minor: %d\n", - iminor(file_inode(file))); - - /* just registers wait_queue hook. This doesn't really wait. */ - poll_wait(file, &is->wq, wait); - - if (!(is->state & IPPP_OPEN)) { - if (is->state == IPPP_CLOSEWAIT) - return EPOLLHUP; - printk(KERN_DEBUG "isdn_ppp: device not open\n"); - return EPOLLERR; - } - /* we're always ready to send .. */ - mask = EPOLLOUT | EPOLLWRNORM; - - spin_lock_irqsave(&is->buflock, flags); - bl = is->last; - bf = is->first; - /* - * if IPPP_NOBLOCK is set we return even if we have nothing to read - */ - if (bf->next != bl || (is->state & IPPP_NOBLOCK)) { - is->state &= ~IPPP_NOBLOCK; - mask |= EPOLLIN | EPOLLRDNORM; - } - spin_unlock_irqrestore(&is->buflock, flags); - return mask; -} - -/* - * fill up isdn_ppp_read() queue .. - */ - -static int -isdn_ppp_fill_rq(unsigned char *buf, int len, int proto, int slot) -{ - struct ippp_buf_queue *bf, *bl; - u_long flags; - u_char *nbuf; - struct ippp_struct *is; - - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_WARNING "ippp: illegal slot(%d).\n", slot); - return 0; - } - is = ippp_table[slot]; - - if (!(is->state & IPPP_CONNECT)) { - printk(KERN_DEBUG "ippp: device not activated.\n"); - return 0; - } - nbuf = kmalloc(len + 4, GFP_ATOMIC); - if (!nbuf) { - printk(KERN_WARNING "ippp: Can't alloc buf\n"); - return 0; - } - nbuf[0] = PPP_ALLSTATIONS; - nbuf[1] = PPP_UI; - nbuf[2] = proto >> 8; - nbuf[3] = proto & 0xff; - memcpy(nbuf + 4, buf, len); - - spin_lock_irqsave(&is->buflock, flags); - bf = is->first; - bl = is->last; - - if (bf == bl) { - printk(KERN_WARNING "ippp: Queue is full; discarding first buffer\n"); - bf = bf->next; - kfree(bf->buf); - is->first = bf; - } - bl->buf = (char *) nbuf; - bl->len = len + 4; - - is->last = bl->next; - spin_unlock_irqrestore(&is->buflock, flags); - wake_up_interruptible(&is->wq); - return len; -} - -/* - * read() .. non-blocking: ipppd calls it only after select() - * reports, that there is data - */ - -int -isdn_ppp_read(int min, struct file *file, char __user *buf, int count) -{ - struct ippp_struct *is; - struct ippp_buf_queue *b; - u_long flags; - u_char *save_buf; - - is = file->private_data; - - if (!(is->state & IPPP_OPEN)) - return 0; - - spin_lock_irqsave(&is->buflock, flags); - b = is->first->next; - save_buf = b->buf; - if (!save_buf) { - spin_unlock_irqrestore(&is->buflock, flags); - return -EAGAIN; - } - if (b->len < count) - count = b->len; - b->buf = NULL; - is->first = b; - - spin_unlock_irqrestore(&is->buflock, flags); - if (copy_to_user(buf, save_buf, count)) - count = -EFAULT; - kfree(save_buf); - - return count; -} - -/* - * ipppd wanna write a packet to the card .. non-blocking - */ - -int -isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) -{ - isdn_net_local *lp; - struct ippp_struct *is; - int proto; - - is = file->private_data; - - if (!(is->state & IPPP_CONNECT)) - return 0; - - lp = is->lp; - - /* -> push it directly to the lowlevel interface */ - - if (!lp) - printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n"); - else { - if (lp->isdn_device < 0 || lp->isdn_channel < 0) { - unsigned char protobuf[4]; - /* - * Don't reset huptimer for - * LCP packets. (Echo requests). - */ - if (copy_from_user(protobuf, buf, 4)) - return -EFAULT; - - proto = PPP_PROTOCOL(protobuf); - if (proto != PPP_LCP) - lp->huptimer = 0; - - return 0; - } - - if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) && - lp->dialstate == 0 && - (lp->flags & ISDN_NET_CONNECTED)) { - unsigned short hl; - struct sk_buff *skb; - unsigned char *cpy_buf; - /* - * we need to reserve enough space in front of - * sk_buff. old call to dev_alloc_skb only reserved - * 16 bytes, now we are looking what the driver want - */ - hl = dev->drv[lp->isdn_device]->interface->hl_hdrlen; - skb = alloc_skb(hl + count, GFP_ATOMIC); - if (!skb) { - printk(KERN_WARNING "isdn_ppp_write: out of memory!\n"); - return count; - } - skb_reserve(skb, hl); - cpy_buf = skb_put(skb, count); - if (copy_from_user(cpy_buf, buf, count)) - { - kfree_skb(skb); - return -EFAULT; - } - - /* - * Don't reset huptimer for - * LCP packets. (Echo requests). - */ - proto = PPP_PROTOCOL(cpy_buf); - if (proto != PPP_LCP) - lp->huptimer = 0; - - if (is->debug & 0x40) { - printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len); - isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - } - - isdn_ppp_send_ccp(lp->netdev, lp, skb); /* keeps CCP/compression states in sync */ - - isdn_net_write_super(lp, skb); - } - } - return count; -} - -/* - * init memory, structures etc. - */ - -int -isdn_ppp_init(void) -{ - int i, - j; - -#ifdef CONFIG_ISDN_MPP - if (isdn_ppp_mp_bundle_array_init() < 0) - return -ENOMEM; -#endif /* CONFIG_ISDN_MPP */ - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - if (!(ippp_table[i] = kzalloc(sizeof(struct ippp_struct), GFP_KERNEL))) { - printk(KERN_WARNING "isdn_ppp_init: Could not alloc ippp_table\n"); - for (j = 0; j < i; j++) - kfree(ippp_table[j]); - return -1; - } - spin_lock_init(&ippp_table[i]->buflock); - ippp_table[i]->state = 0; - ippp_table[i]->first = ippp_table[i]->rq + NUM_RCV_BUFFS - 1; - ippp_table[i]->last = ippp_table[i]->rq; - - for (j = 0; j < NUM_RCV_BUFFS; j++) { - ippp_table[i]->rq[j].buf = NULL; - ippp_table[i]->rq[j].last = ippp_table[i]->rq + - (NUM_RCV_BUFFS + j - 1) % NUM_RCV_BUFFS; - ippp_table[i]->rq[j].next = ippp_table[i]->rq + (j + 1) % NUM_RCV_BUFFS; - } - } - return 0; -} - -void -isdn_ppp_cleanup(void) -{ - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - kfree(ippp_table[i]); - -#ifdef CONFIG_ISDN_MPP - kfree(isdn_ppp_bundle_arr); -#endif /* CONFIG_ISDN_MPP */ - -} - -/* - * check for address/control field and skip if allowed - * retval != 0 -> discard packet silently - */ -static int isdn_ppp_skip_ac(struct ippp_struct *is, struct sk_buff *skb) -{ - if (skb->len < 1) - return -1; - - if (skb->data[0] == 0xff) { - if (skb->len < 2) - return -1; - - if (skb->data[1] != 0x03) - return -1; - - // skip address/control (AC) field - skb_pull(skb, 2); - } else { - if (is->pppcfg & SC_REJ_COMP_AC) - // if AC compression was not negotiated, but used, discard packet - return -1; - } - return 0; -} - -/* - * get the PPP protocol header and pull skb - * retval < 0 -> discard packet silently - */ -static int isdn_ppp_strip_proto(struct sk_buff *skb) -{ - int proto; - - if (skb->len < 1) - return -1; - - if (skb->data[0] & 0x1) { - // protocol field is compressed - proto = skb->data[0]; - skb_pull(skb, 1); - } else { - if (skb->len < 2) - return -1; - proto = ((int) skb->data[0] << 8) + skb->data[1]; - skb_pull(skb, 2); - } - return proto; -} - - -/* - * handler for incoming packets on a syncPPP interface - */ -void isdn_ppp_receive(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *skb) -{ - struct ippp_struct *is; - int slot; - int proto; - - BUG_ON(net_dev->local->master); // we're called with the master device always - - slot = lp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "isdn_ppp_receive: lp->ppp_slot(%d)\n", - lp->ppp_slot); - kfree_skb(skb); - return; - } - is = ippp_table[slot]; - - if (is->debug & 0x4) { - printk(KERN_DEBUG "ippp_receive: is:%08lx lp:%08lx slot:%d unit:%d len:%d\n", - (long)is, (long)lp, lp->ppp_slot, is->unit, (int)skb->len); - isdn_ppp_frame_log("receive", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - } - - if (isdn_ppp_skip_ac(is, skb) < 0) { - kfree_skb(skb); - return; - } - proto = isdn_ppp_strip_proto(skb); - if (proto < 0) { - kfree_skb(skb); - return; - } - -#ifdef CONFIG_ISDN_MPP - if (is->compflags & SC_LINK_DECOMP_ON) { - skb = isdn_ppp_decompress(skb, is, NULL, &proto); - if (!skb) // decompression error - return; - } - - if (!(is->mpppcfg & SC_REJ_MP_PROT)) { // we agreed to receive MPPP - if (proto == PPP_MP) { - isdn_ppp_mp_receive(net_dev, lp, skb); - return; - } - } -#endif - isdn_ppp_push_higher(net_dev, lp, skb, proto); -} - -/* - * we receive a reassembled frame, MPPP has been taken care of before. - * address/control and protocol have been stripped from the skb - * note: net_dev has to be master net_dev - */ -static void -isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *skb, int proto) -{ - struct net_device *dev = net_dev->dev; - struct ippp_struct *is, *mis; - isdn_net_local *mlp = NULL; - int slot; - - slot = lp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "isdn_ppp_push_higher: lp->ppp_slot(%d)\n", - lp->ppp_slot); - goto drop_packet; - } - is = ippp_table[slot]; - - if (lp->master) { // FIXME? - mlp = ISDN_MASTER_PRIV(lp); - slot = mlp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "isdn_ppp_push_higher: master->ppp_slot(%d)\n", - lp->ppp_slot); - goto drop_packet; - } - } - mis = ippp_table[slot]; - - if (is->debug & 0x10) { - printk(KERN_DEBUG "push, skb %d %04x\n", (int) skb->len, proto); - isdn_ppp_frame_log("rpush", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - } - if (mis->compflags & SC_DECOMP_ON) { - skb = isdn_ppp_decompress(skb, is, mis, &proto); - if (!skb) // decompression error - return; - } - switch (proto) { - case PPP_IPX: /* untested */ - if (is->debug & 0x20) - printk(KERN_DEBUG "isdn_ppp: IPX\n"); - skb->protocol = htons(ETH_P_IPX); - break; - case PPP_IP: - if (is->debug & 0x20) - printk(KERN_DEBUG "isdn_ppp: IP\n"); - skb->protocol = htons(ETH_P_IP); - break; - case PPP_COMP: - case PPP_COMPFRAG: - printk(KERN_INFO "isdn_ppp: unexpected compressed frame dropped\n"); - goto drop_packet; -#ifdef CONFIG_ISDN_PPP_VJ - case PPP_VJC_UNCOMP: - if (is->debug & 0x20) - printk(KERN_DEBUG "isdn_ppp: VJC_UNCOMP\n"); - if (net_dev->local->ppp_slot < 0) { - printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n", - __func__, net_dev->local->ppp_slot); - goto drop_packet; - } - if (slhc_remember(ippp_table[net_dev->local->ppp_slot]->slcomp, skb->data, skb->len) <= 0) { - printk(KERN_WARNING "isdn_ppp: received illegal VJC_UNCOMP frame!\n"); - goto drop_packet; - } - skb->protocol = htons(ETH_P_IP); - break; - case PPP_VJC_COMP: - if (is->debug & 0x20) - printk(KERN_DEBUG "isdn_ppp: VJC_COMP\n"); - { - struct sk_buff *skb_old = skb; - int pkt_len; - skb = dev_alloc_skb(skb_old->len + 128); - - if (!skb) { - printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name); - skb = skb_old; - goto drop_packet; - } - skb_put(skb, skb_old->len + 128); - skb_copy_from_linear_data(skb_old, skb->data, - skb_old->len); - if (net_dev->local->ppp_slot < 0) { - printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n", - __func__, net_dev->local->ppp_slot); - goto drop_packet; - } - pkt_len = slhc_uncompress(ippp_table[net_dev->local->ppp_slot]->slcomp, - skb->data, skb_old->len); - kfree_skb(skb_old); - if (pkt_len < 0) - goto drop_packet; - - skb_trim(skb, pkt_len); - skb->protocol = htons(ETH_P_IP); - } - break; -#endif - case PPP_CCP: - case PPP_CCPFRAG: - isdn_ppp_receive_ccp(net_dev, lp, skb, proto); - /* Dont pop up ResetReq/Ack stuff to the daemon any - longer - the job is done already */ - if (skb->data[0] == CCP_RESETREQ || - skb->data[0] == CCP_RESETACK) - break; - /* fall through */ - default: - isdn_ppp_fill_rq(skb->data, skb->len, proto, lp->ppp_slot); /* push data to pppd device */ - kfree_skb(skb); - return; - } - -#ifdef CONFIG_IPPP_FILTER - /* check if the packet passes the pass and active filters - * the filter instructions are constructed assuming - * a four-byte PPP header on each packet (which is still present) */ - skb_push(skb, 4); - - { - u_int16_t *p = (u_int16_t *) skb->data; - - *p = 0; /* indicate inbound */ - } - - if (is->pass_filter - && BPF_PROG_RUN(is->pass_filter, skb) == 0) { - if (is->debug & 0x2) - printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); - kfree_skb(skb); - return; - } - if (!(is->active_filter - && BPF_PROG_RUN(is->active_filter, skb) == 0)) { - if (is->debug & 0x2) - printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); - lp->huptimer = 0; - if (mlp) - mlp->huptimer = 0; - } - skb_pull(skb, 4); -#else /* CONFIG_IPPP_FILTER */ - lp->huptimer = 0; - if (mlp) - mlp->huptimer = 0; -#endif /* CONFIG_IPPP_FILTER */ - skb->dev = dev; - skb_reset_mac_header(skb); - netif_rx(skb); - /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ - return; - -drop_packet: - net_dev->local->stats.rx_dropped++; - kfree_skb(skb); -} - -/* - * isdn_ppp_skb_push .. - * checks whether we have enough space at the beginning of the skb - * and allocs a new SKB if necessary - */ -static unsigned char *isdn_ppp_skb_push(struct sk_buff **skb_p, int len) -{ - struct sk_buff *skb = *skb_p; - - if (skb_headroom(skb) < len) { - struct sk_buff *nskb = skb_realloc_headroom(skb, len); - - if (!nskb) { - printk(KERN_ERR "isdn_ppp_skb_push: can't realloc headroom!\n"); - dev_kfree_skb(skb); - return NULL; - } - printk(KERN_DEBUG "isdn_ppp_skb_push:under %d %d\n", skb_headroom(skb), len); - dev_kfree_skb(skb); - *skb_p = nskb; - return skb_push(nskb, len); - } - return skb_push(skb, len); -} - -/* - * send ppp frame .. we expect a PIDCOMPressable proto -- - * (here: currently always PPP_IP,PPP_VJC_COMP,PPP_VJC_UNCOMP) - * - * VJ compression may change skb pointer!!! .. requeue with old - * skb isn't allowed!! - */ - -int -isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) -{ - isdn_net_local *lp, *mlp; - isdn_net_dev *nd; - unsigned int proto = PPP_IP; /* 0x21 */ - struct ippp_struct *ipt, *ipts; - int slot, retval = NETDEV_TX_OK; - - mlp = netdev_priv(netdev); - nd = mlp->netdev; /* get master lp */ - - slot = mlp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "isdn_ppp_xmit: lp->ppp_slot(%d)\n", - mlp->ppp_slot); - kfree_skb(skb); - goto out; - } - ipts = ippp_table[slot]; - - if (!(ipts->pppcfg & SC_ENABLE_IP)) { /* PPP connected ? */ - if (ipts->debug & 0x1) - printk(KERN_INFO "%s: IP frame delayed.\n", netdev->name); - retval = NETDEV_TX_BUSY; - goto out; - } - - switch (ntohs(skb->protocol)) { - case ETH_P_IP: - proto = PPP_IP; - break; - case ETH_P_IPX: - proto = PPP_IPX; /* untested */ - break; - default: - printk(KERN_ERR "isdn_ppp: skipped unsupported protocol: %#x.\n", - skb->protocol); - dev_kfree_skb(skb); - goto out; - } - - lp = isdn_net_get_locked_lp(nd); - if (!lp) { - printk(KERN_WARNING "%s: all channels busy - requeuing!\n", netdev->name); - retval = NETDEV_TX_BUSY; - goto out; - } - /* we have our lp locked from now on */ - - slot = lp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "isdn_ppp_xmit: lp->ppp_slot(%d)\n", - lp->ppp_slot); - kfree_skb(skb); - goto unlock; - } - ipt = ippp_table[slot]; - - /* - * after this line .. requeueing in the device queue is no longer allowed!!! - */ - - /* Pull off the fake header we stuck on earlier to keep - * the fragmentation code happy. - */ - skb_pull(skb, IPPP_MAX_HEADER); - -#ifdef CONFIG_IPPP_FILTER - /* check if we should pass this packet - * the filter instructions are constructed assuming - * a four-byte PPP header on each packet */ - *(u8 *)skb_push(skb, 4) = 1; /* indicate outbound */ - - { - __be16 *p = (__be16 *)skb->data; - - p++; - *p = htons(proto); - } - - if (ipt->pass_filter - && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) { - if (ipt->debug & 0x4) - printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); - kfree_skb(skb); - goto unlock; - } - if (!(ipt->active_filter - && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) { - if (ipt->debug & 0x4) - printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); - lp->huptimer = 0; - } - skb_pull(skb, 4); -#else /* CONFIG_IPPP_FILTER */ - lp->huptimer = 0; -#endif /* CONFIG_IPPP_FILTER */ - - if (ipt->debug & 0x4) - printk(KERN_DEBUG "xmit skb, len %d\n", (int) skb->len); - if (ipts->debug & 0x40) - isdn_ppp_frame_log("xmit0", skb->data, skb->len, 32, ipts->unit, lp->ppp_slot); - -#ifdef CONFIG_ISDN_PPP_VJ - if (proto == PPP_IP && ipts->pppcfg & SC_COMP_TCP) { /* ipts here? probably yes, but check this again */ - struct sk_buff *new_skb; - unsigned short hl; - /* - * we need to reserve enough space in front of - * sk_buff. old call to dev_alloc_skb only reserved - * 16 bytes, now we are looking what the driver want. - */ - hl = dev->drv[lp->isdn_device]->interface->hl_hdrlen + IPPP_MAX_HEADER; - /* - * Note: hl might still be insufficient because the method - * above does not account for a possibible MPPP slave channel - * which had larger HL header space requirements than the - * master. - */ - new_skb = alloc_skb(hl + skb->len, GFP_ATOMIC); - if (new_skb) { - u_char *buf; - int pktlen; - - skb_reserve(new_skb, hl); - new_skb->dev = skb->dev; - skb_put(new_skb, skb->len); - buf = skb->data; - - pktlen = slhc_compress(ipts->slcomp, skb->data, skb->len, new_skb->data, - &buf, !(ipts->pppcfg & SC_NO_TCP_CCID)); - - if (buf != skb->data) { - if (new_skb->data != buf) - printk(KERN_ERR "isdn_ppp: FATAL error after slhc_compress!!\n"); - dev_kfree_skb(skb); - skb = new_skb; - } else { - dev_kfree_skb(new_skb); - } - - skb_trim(skb, pktlen); - if (skb->data[0] & SL_TYPE_COMPRESSED_TCP) { /* cslip? style -> PPP */ - proto = PPP_VJC_COMP; - skb->data[0] ^= SL_TYPE_COMPRESSED_TCP; - } else { - if (skb->data[0] >= SL_TYPE_UNCOMPRESSED_TCP) - proto = PPP_VJC_UNCOMP; - skb->data[0] = (skb->data[0] & 0x0f) | 0x40; - } - } - } -#endif - - /* - * normal (single link) or bundle compression - */ - if (ipts->compflags & SC_COMP_ON) { - /* We send compressed only if both down- und upstream - compression is negotiated, that means, CCP is up */ - if (ipts->compflags & SC_DECOMP_ON) { - skb = isdn_ppp_compress(skb, &proto, ipt, ipts, 0); - } else { - printk(KERN_DEBUG "isdn_ppp: CCP not yet up - sending as-is\n"); - } - } - - if (ipt->debug & 0x24) - printk(KERN_DEBUG "xmit2 skb, len %d, proto %04x\n", (int) skb->len, proto); - -#ifdef CONFIG_ISDN_MPP - if (ipt->mpppcfg & SC_MP_PROT) { - /* we get mp_seqno from static isdn_net_local */ - long mp_seqno = ipts->mp_seqno; - ipts->mp_seqno++; - if (ipt->mpppcfg & SC_OUT_SHORT_SEQ) { - unsigned char *data = isdn_ppp_skb_push(&skb, 3); - if (!data) - goto unlock; - mp_seqno &= 0xfff; - data[0] = MP_BEGIN_FRAG | MP_END_FRAG | ((mp_seqno >> 8) & 0xf); /* (B)egin & (E)ndbit .. */ - data[1] = mp_seqno & 0xff; - data[2] = proto; /* PID compression */ - } else { - unsigned char *data = isdn_ppp_skb_push(&skb, 5); - if (!data) - goto unlock; - data[0] = MP_BEGIN_FRAG | MP_END_FRAG; /* (B)egin & (E)ndbit .. */ - data[1] = (mp_seqno >> 16) & 0xff; /* sequence number: 24bit */ - data[2] = (mp_seqno >> 8) & 0xff; - data[3] = (mp_seqno >> 0) & 0xff; - data[4] = proto; /* PID compression */ - } - proto = PPP_MP; /* MP Protocol, 0x003d */ - } -#endif - - /* - * 'link in bundle' compression ... - */ - if (ipt->compflags & SC_LINK_COMP_ON) - skb = isdn_ppp_compress(skb, &proto, ipt, ipts, 1); - - if ((ipt->pppcfg & SC_COMP_PROT) && (proto <= 0xff)) { - unsigned char *data = isdn_ppp_skb_push(&skb, 1); - if (!data) - goto unlock; - data[0] = proto & 0xff; - } - else { - unsigned char *data = isdn_ppp_skb_push(&skb, 2); - if (!data) - goto unlock; - data[0] = (proto >> 8) & 0xff; - data[1] = proto & 0xff; - } - if (!(ipt->pppcfg & SC_COMP_AC)) { - unsigned char *data = isdn_ppp_skb_push(&skb, 2); - if (!data) - goto unlock; - data[0] = 0xff; /* All Stations */ - data[1] = 0x03; /* Unnumbered information */ - } - - /* tx-stats are now updated via BSENT-callback */ - - if (ipts->debug & 0x40) { - printk(KERN_DEBUG "skb xmit: len: %d\n", (int) skb->len); - isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, ipt->unit, lp->ppp_slot); - } - - isdn_net_writebuf_skb(lp, skb); - -unlock: - spin_unlock_bh(&lp->xmit_lock); -out: - return retval; -} - -#ifdef CONFIG_IPPP_FILTER -/* - * check if this packet may trigger auto-dial. - */ - -int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) -{ - struct ippp_struct *is = ippp_table[lp->ppp_slot]; - u_int16_t proto; - int drop = 0; - - switch (ntohs(skb->protocol)) { - case ETH_P_IP: - proto = PPP_IP; - break; - case ETH_P_IPX: - proto = PPP_IPX; - break; - default: - printk(KERN_ERR "isdn_ppp_autodial_filter: unsupported protocol 0x%x.\n", - skb->protocol); - return 1; - } - - /* the filter instructions are constructed assuming - * a four-byte PPP header on each packet. we have to - * temporarily remove part of the fake header stuck on - * earlier. - */ - *(u8 *)skb_pull(skb, IPPP_MAX_HEADER - 4) = 1; /* indicate outbound */ - - { - __be16 *p = (__be16 *)skb->data; - - p++; - *p = htons(proto); - } - - drop |= is->pass_filter - && BPF_PROG_RUN(is->pass_filter, skb) == 0; - drop |= is->active_filter - && BPF_PROG_RUN(is->active_filter, skb) == 0; - - skb_push(skb, IPPP_MAX_HEADER - 4); - return drop; -} -#endif -#ifdef CONFIG_ISDN_MPP - -/* this is _not_ rfc1990 header, but something we convert both short and long - * headers to for convinience's sake: - * byte 0 is flags as in rfc1990 - * bytes 1...4 is 24-bit seqence number converted to host byte order - */ -#define MP_HEADER_LEN 5 - -#define MP_LONGSEQ_MASK 0x00ffffff -#define MP_SHORTSEQ_MASK 0x00000fff -#define MP_LONGSEQ_MAX MP_LONGSEQ_MASK -#define MP_SHORTSEQ_MAX MP_SHORTSEQ_MASK -#define MP_LONGSEQ_MAXBIT ((MP_LONGSEQ_MASK + 1) >> 1) -#define MP_SHORTSEQ_MAXBIT ((MP_SHORTSEQ_MASK + 1) >> 1) - -/* sequence-wrap safe comparisons (for long sequence)*/ -#define MP_LT(a, b) ((a - b) & MP_LONGSEQ_MAXBIT) -#define MP_LE(a, b) !((b - a) & MP_LONGSEQ_MAXBIT) -#define MP_GT(a, b) ((b - a) & MP_LONGSEQ_MAXBIT) -#define MP_GE(a, b) !((a - b) & MP_LONGSEQ_MAXBIT) - -#define MP_SEQ(f) ((*(u32 *)(f->data + 1))) -#define MP_FLAGS(f) (f->data[0]) - -static int isdn_ppp_mp_bundle_array_init(void) -{ - int i; - int sz = ISDN_MAX_CHANNELS * sizeof(ippp_bundle); - if ((isdn_ppp_bundle_arr = kzalloc(sz, GFP_KERNEL)) == NULL) - return -ENOMEM; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - spin_lock_init(&isdn_ppp_bundle_arr[i].lock); - return 0; -} - -static ippp_bundle *isdn_ppp_mp_bundle_alloc(void) -{ - int i; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (isdn_ppp_bundle_arr[i].ref_ct <= 0) - return (isdn_ppp_bundle_arr + i); - return NULL; -} - -static int isdn_ppp_mp_init(isdn_net_local *lp, ippp_bundle *add_to) -{ - struct ippp_struct *is; - - if (lp->ppp_slot < 0) { - printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n", - __func__, lp->ppp_slot); - return (-EINVAL); - } - - is = ippp_table[lp->ppp_slot]; - if (add_to) { - if (lp->netdev->pb) - lp->netdev->pb->ref_ct--; - lp->netdev->pb = add_to; - } else { /* first link in a bundle */ - is->mp_seqno = 0; - if ((lp->netdev->pb = isdn_ppp_mp_bundle_alloc()) == NULL) - return -ENOMEM; - lp->next = lp->last = lp; /* nobody else in a queue */ - lp->netdev->pb->frags = NULL; - lp->netdev->pb->frames = 0; - lp->netdev->pb->seq = UINT_MAX; - } - lp->netdev->pb->ref_ct++; - - is->last_link_seqno = 0; - return 0; -} - -static u32 isdn_ppp_mp_get_seq(int short_seq, - struct sk_buff *skb, u32 last_seq); -static struct sk_buff *isdn_ppp_mp_discard(ippp_bundle *mp, - struct sk_buff *from, struct sk_buff *to); -static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *from, struct sk_buff *to); -static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb); -static void isdn_ppp_mp_print_recv_pkt(int slot, struct sk_buff *skb); - -static void isdn_ppp_mp_receive(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb) -{ - struct ippp_struct *is; - isdn_net_local *lpq; - ippp_bundle *mp; - isdn_mppp_stats *stats; - struct sk_buff *newfrag, *frag, *start, *nextf; - u32 newseq, minseq, thisseq; - unsigned long flags; - int slot; - - spin_lock_irqsave(&net_dev->pb->lock, flags); - mp = net_dev->pb; - stats = &mp->stats; - slot = lp->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: lp->ppp_slot(%d)\n", - __func__, lp->ppp_slot); - stats->frame_drops++; - dev_kfree_skb(skb); - spin_unlock_irqrestore(&mp->lock, flags); - return; - } - is = ippp_table[slot]; - if (++mp->frames > stats->max_queue_len) - stats->max_queue_len = mp->frames; - - if (is->debug & 0x8) - isdn_ppp_mp_print_recv_pkt(lp->ppp_slot, skb); - - newseq = isdn_ppp_mp_get_seq(is->mpppcfg & SC_IN_SHORT_SEQ, - skb, is->last_link_seqno); - - - /* if this packet seq # is less than last already processed one, - * toss it right away, but check for sequence start case first - */ - if (mp->seq > MP_LONGSEQ_MAX && (newseq & MP_LONGSEQ_MAXBIT)) { - mp->seq = newseq; /* the first packet: required for - * rfc1990 non-compliant clients -- - * prevents constant packet toss */ - } else if (MP_LT(newseq, mp->seq)) { - stats->frame_drops++; - isdn_ppp_mp_free_skb(mp, skb); - spin_unlock_irqrestore(&mp->lock, flags); - return; - } - - /* find the minimum received sequence number over all links */ - is->last_link_seqno = minseq = newseq; - for (lpq = net_dev->queue;;) { - slot = lpq->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: lpq->ppp_slot(%d)\n", - __func__, lpq->ppp_slot); - } else { - u32 lls = ippp_table[slot]->last_link_seqno; - if (MP_LT(lls, minseq)) - minseq = lls; - } - if ((lpq = lpq->next) == net_dev->queue) - break; - } - if (MP_LT(minseq, mp->seq)) - minseq = mp->seq; /* can't go beyond already processed - * packets */ - newfrag = skb; - - /* if this new fragment is before the first one, then enqueue it now. */ - if ((frag = mp->frags) == NULL || MP_LT(newseq, MP_SEQ(frag))) { - newfrag->next = frag; - mp->frags = frag = newfrag; - newfrag = NULL; - } - - start = MP_FLAGS(frag) & MP_BEGIN_FRAG && - MP_SEQ(frag) == mp->seq ? frag : NULL; - - /* - * main fragment traversing loop - * - * try to accomplish several tasks: - * - insert new fragment into the proper sequence slot (once that's done - * newfrag will be set to NULL) - * - reassemble any complete fragment sequence (non-null 'start' - * indicates there is a contiguous sequence present) - * - discard any incomplete sequences that are below minseq -- due - * to the fact that sender always increment sequence number, if there - * is an incomplete sequence below minseq, no new fragments would - * come to complete such sequence and it should be discarded - * - * loop completes when we accomplished the following tasks: - * - new fragment is inserted in the proper sequence ('newfrag' is - * set to NULL) - * - we hit a gap in the sequence, so no reassembly/processing is - * possible ('start' would be set to NULL) - * - * algorithm for this code is derived from code in the book - * 'PPP Design And Debugging' by James Carlson (Addison-Wesley) - */ - while (start != NULL || newfrag != NULL) { - - thisseq = MP_SEQ(frag); - nextf = frag->next; - - /* drop any duplicate fragments */ - if (newfrag != NULL && thisseq == newseq) { - isdn_ppp_mp_free_skb(mp, newfrag); - newfrag = NULL; - } - - /* insert new fragment before next element if possible. */ - if (newfrag != NULL && (nextf == NULL || - MP_LT(newseq, MP_SEQ(nextf)))) { - newfrag->next = nextf; - frag->next = nextf = newfrag; - newfrag = NULL; - } - - if (start != NULL) { - /* check for misplaced start */ - if (start != frag && (MP_FLAGS(frag) & MP_BEGIN_FRAG)) { - printk(KERN_WARNING"isdn_mppp(seq %d): new " - "BEGIN flag with no prior END", thisseq); - stats->seqerrs++; - stats->frame_drops++; - start = isdn_ppp_mp_discard(mp, start, frag); - nextf = frag->next; - } - } else if (MP_LE(thisseq, minseq)) { - if (MP_FLAGS(frag) & MP_BEGIN_FRAG) - start = frag; - else { - if (MP_FLAGS(frag) & MP_END_FRAG) - stats->frame_drops++; - if (mp->frags == frag) - mp->frags = nextf; - isdn_ppp_mp_free_skb(mp, frag); - frag = nextf; - continue; - } - } - - /* if start is non-null and we have end fragment, then - * we have full reassembly sequence -- reassemble - * and process packet now - */ - if (start != NULL && (MP_FLAGS(frag) & MP_END_FRAG)) { - minseq = mp->seq = (thisseq + 1) & MP_LONGSEQ_MASK; - /* Reassemble the packet then dispatch it */ - isdn_ppp_mp_reassembly(net_dev, lp, start, nextf); - - start = NULL; - frag = NULL; - - mp->frags = nextf; - } - - /* check if need to update start pointer: if we just - * reassembled the packet and sequence is contiguous - * then next fragment should be the start of new reassembly - * if sequence is contiguous, but we haven't reassembled yet, - * keep going. - * if sequence is not contiguous, either clear everything - * below low watermark and set start to the next frag or - * clear start ptr. - */ - if (nextf != NULL && - ((thisseq + 1) & MP_LONGSEQ_MASK) == MP_SEQ(nextf)) { - /* if we just reassembled and the next one is here, - * then start another reassembly. */ - - if (frag == NULL) { - if (MP_FLAGS(nextf) & MP_BEGIN_FRAG) - start = nextf; - else - { - printk(KERN_WARNING"isdn_mppp(seq %d):" - " END flag with no following " - "BEGIN", thisseq); - stats->seqerrs++; - } - } - - } else { - if (nextf != NULL && frag != NULL && - MP_LT(thisseq, minseq)) { - /* we've got a break in the sequence - * and we not at the end yet - * and we did not just reassembled - *(if we did, there wouldn't be anything before) - * and we below the low watermark - * discard all the frames below low watermark - * and start over */ - stats->frame_drops++; - mp->frags = isdn_ppp_mp_discard(mp, start, nextf); - } - /* break in the sequence, no reassembly */ - start = NULL; - } - - frag = nextf; - } /* while -- main loop */ - - if (mp->frags == NULL) - mp->frags = frag; - - /* rather straighforward way to deal with (not very) possible - * queue overflow */ - if (mp->frames > MP_MAX_QUEUE_LEN) { - stats->overflows++; - while (mp->frames > MP_MAX_QUEUE_LEN) { - frag = mp->frags->next; - isdn_ppp_mp_free_skb(mp, mp->frags); - mp->frags = frag; - } - } - spin_unlock_irqrestore(&mp->lock, flags); -} - -static void isdn_ppp_mp_cleanup(isdn_net_local *lp) -{ - struct sk_buff *frag = lp->netdev->pb->frags; - struct sk_buff *nextfrag; - while (frag) { - nextfrag = frag->next; - isdn_ppp_mp_free_skb(lp->netdev->pb, frag); - frag = nextfrag; - } - lp->netdev->pb->frags = NULL; -} - -static u32 isdn_ppp_mp_get_seq(int short_seq, - struct sk_buff *skb, u32 last_seq) -{ - u32 seq; - int flags = skb->data[0] & (MP_BEGIN_FRAG | MP_END_FRAG); - - if (!short_seq) - { - seq = ntohl(*(__be32 *)skb->data) & MP_LONGSEQ_MASK; - skb_push(skb, 1); - } - else - { - /* convert 12-bit short seq number to 24-bit long one - */ - seq = ntohs(*(__be16 *)skb->data) & MP_SHORTSEQ_MASK; - - /* check for seqence wrap */ - if (!(seq & MP_SHORTSEQ_MAXBIT) && - (last_seq & MP_SHORTSEQ_MAXBIT) && - (unsigned long)last_seq <= MP_LONGSEQ_MAX) - seq |= (last_seq + MP_SHORTSEQ_MAX + 1) & - (~MP_SHORTSEQ_MASK & MP_LONGSEQ_MASK); - else - seq |= last_seq & (~MP_SHORTSEQ_MASK & MP_LONGSEQ_MASK); - - skb_push(skb, 3); /* put converted seqence back in skb */ - } - *(u32 *)(skb->data + 1) = seq; /* put seqence back in _host_ byte - * order */ - skb->data[0] = flags; /* restore flags */ - return seq; -} - -static struct sk_buff *isdn_ppp_mp_discard(ippp_bundle *mp, - struct sk_buff *from, - struct sk_buff *to) -{ - if (from) - while (from != to) { - struct sk_buff *next = from->next; - isdn_ppp_mp_free_skb(mp, from); - from = next; - } - return from; -} - -static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *from, struct sk_buff *to) -{ - ippp_bundle *mp = net_dev->pb; - int proto; - struct sk_buff *skb; - unsigned int tot_len; - - if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n", - __func__, lp->ppp_slot); - return; - } - if (MP_FLAGS(from) == (MP_BEGIN_FRAG | MP_END_FRAG)) { - if (ippp_table[lp->ppp_slot]->debug & 0x40) - printk(KERN_DEBUG "isdn_mppp: reassembly: frame %d, " - "len %d\n", MP_SEQ(from), from->len); - skb = from; - skb_pull(skb, MP_HEADER_LEN); - mp->frames--; - } else { - struct sk_buff *frag; - int n; - - for (tot_len = n = 0, frag = from; frag != to; frag = frag->next, n++) - tot_len += frag->len - MP_HEADER_LEN; - - if (ippp_table[lp->ppp_slot]->debug & 0x40) - printk(KERN_DEBUG"isdn_mppp: reassembling frames %d " - "to %d, len %d\n", MP_SEQ(from), - (MP_SEQ(from) + n - 1) & MP_LONGSEQ_MASK, tot_len); - if ((skb = dev_alloc_skb(tot_len)) == NULL) { - printk(KERN_ERR "isdn_mppp: cannot allocate sk buff " - "of size %d\n", tot_len); - isdn_ppp_mp_discard(mp, from, to); - return; - } - - while (from != to) { - unsigned int len = from->len - MP_HEADER_LEN; - - skb_copy_from_linear_data_offset(from, MP_HEADER_LEN, - skb_put(skb, len), - len); - frag = from->next; - isdn_ppp_mp_free_skb(mp, from); - from = frag; - } - } - proto = isdn_ppp_strip_proto(skb); - isdn_ppp_push_higher(net_dev, lp, skb, proto); -} - -static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb) -{ - dev_kfree_skb(skb); - mp->frames--; -} - -static void isdn_ppp_mp_print_recv_pkt(int slot, struct sk_buff *skb) -{ - printk(KERN_DEBUG "mp_recv: %d/%d -> %02x %02x %02x %02x %02x %02x\n", - slot, (int) skb->len, - (int) skb->data[0], (int) skb->data[1], (int) skb->data[2], - (int) skb->data[3], (int) skb->data[4], (int) skb->data[5]); -} - -static int -isdn_ppp_bundle(struct ippp_struct *is, int unit) -{ - char ifn[IFNAMSIZ + 1]; - isdn_net_dev *p; - isdn_net_local *lp, *nlp; - int rc; - unsigned long flags; - - sprintf(ifn, "ippp%d", unit); - p = isdn_net_findif(ifn); - if (!p) { - printk(KERN_ERR "ippp_bundle: cannot find %s\n", ifn); - return -EINVAL; - } - - spin_lock_irqsave(&p->pb->lock, flags); - - nlp = is->lp; - lp = p->queue; - if (nlp->ppp_slot < 0 || nlp->ppp_slot >= ISDN_MAX_CHANNELS || - lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "ippp_bundle: binding to invalid slot %d\n", - nlp->ppp_slot < 0 || nlp->ppp_slot >= ISDN_MAX_CHANNELS ? - nlp->ppp_slot : lp->ppp_slot); - rc = -EINVAL; - goto out; - } - - isdn_net_add_to_bundle(p, nlp); - - ippp_table[nlp->ppp_slot]->unit = ippp_table[lp->ppp_slot]->unit; - - /* maybe also SC_CCP stuff */ - ippp_table[nlp->ppp_slot]->pppcfg |= ippp_table[lp->ppp_slot]->pppcfg & - (SC_ENABLE_IP | SC_NO_TCP_CCID | SC_REJ_COMP_TCP); - ippp_table[nlp->ppp_slot]->mpppcfg |= ippp_table[lp->ppp_slot]->mpppcfg & - (SC_MP_PROT | SC_REJ_MP_PROT | SC_OUT_SHORT_SEQ | SC_IN_SHORT_SEQ); - rc = isdn_ppp_mp_init(nlp, p->pb); -out: - spin_unlock_irqrestore(&p->pb->lock, flags); - return rc; -} - -#endif /* CONFIG_ISDN_MPP */ - -/* - * network device ioctl handlers - */ - -static int -isdn_ppp_dev_ioctl_stats(int slot, struct ifreq *ifr, struct net_device *dev) -{ - struct ppp_stats __user *res = ifr->ifr_data; - struct ppp_stats t; - isdn_net_local *lp = netdev_priv(dev); - - /* build a temporary stat struct and copy it to user space */ - - memset(&t, 0, sizeof(struct ppp_stats)); - if (dev->flags & IFF_UP) { - t.p.ppp_ipackets = lp->stats.rx_packets; - t.p.ppp_ibytes = lp->stats.rx_bytes; - t.p.ppp_ierrors = lp->stats.rx_errors; - t.p.ppp_opackets = lp->stats.tx_packets; - t.p.ppp_obytes = lp->stats.tx_bytes; - t.p.ppp_oerrors = lp->stats.tx_errors; -#ifdef CONFIG_ISDN_PPP_VJ - if (slot >= 0 && ippp_table[slot]->slcomp) { - struct slcompress *slcomp = ippp_table[slot]->slcomp; - t.vj.vjs_packets = slcomp->sls_o_compressed + slcomp->sls_o_uncompressed; - t.vj.vjs_compressed = slcomp->sls_o_compressed; - t.vj.vjs_searches = slcomp->sls_o_searches; - t.vj.vjs_misses = slcomp->sls_o_misses; - t.vj.vjs_errorin = slcomp->sls_i_error; - t.vj.vjs_tossed = slcomp->sls_i_tossed; - t.vj.vjs_uncompressedin = slcomp->sls_i_uncompressed; - t.vj.vjs_compressedin = slcomp->sls_i_compressed; - } -#endif - } - if (copy_to_user(res, &t, sizeof(struct ppp_stats))) - return -EFAULT; - return 0; -} - -int -isdn_ppp_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - int error = 0; - int len; - isdn_net_local *lp = netdev_priv(dev); - - - if (lp->p_encap != ISDN_NET_ENCAP_SYNCPPP) - return -EINVAL; - - switch (cmd) { -#define PPP_VERSION "2.3.7" - case SIOCGPPPVER: - len = strlen(PPP_VERSION) + 1; - if (copy_to_user(ifr->ifr_data, PPP_VERSION, len)) - error = -EFAULT; - break; - - case SIOCGPPPSTATS: - error = isdn_ppp_dev_ioctl_stats(lp->ppp_slot, ifr, dev); - break; - default: - error = -EINVAL; - break; - } - return error; -} - -static int -isdn_ppp_if_get_unit(char *name) -{ - int len, - i, - unit = 0, - deci; - - len = strlen(name); - - if (strncmp("ippp", name, 4) || len > 8) - return -1; - - for (i = 0, deci = 1; i < len; i++, deci *= 10) { - char a = name[len - i - 1]; - if (a >= '0' && a <= '9') - unit += (a - '0') * deci; - else - break; - } - if (!i || len - i != 4) - unit = -1; - - return unit; -} - - -int -isdn_ppp_dial_slave(char *name) -{ -#ifdef CONFIG_ISDN_MPP - isdn_net_dev *ndev; - isdn_net_local *lp; - struct net_device *sdev; - - if (!(ndev = isdn_net_findif(name))) - return 1; - lp = ndev->local; - if (!(lp->flags & ISDN_NET_CONNECTED)) - return 5; - - sdev = lp->slave; - while (sdev) { - isdn_net_local *mlp = netdev_priv(sdev); - if (!(mlp->flags & ISDN_NET_CONNECTED)) - break; - sdev = mlp->slave; - } - if (!sdev) - return 2; - - isdn_net_dial_req(netdev_priv(sdev)); - return 0; -#else - return -1; -#endif -} - -int -isdn_ppp_hangup_slave(char *name) -{ -#ifdef CONFIG_ISDN_MPP - isdn_net_dev *ndev; - isdn_net_local *lp; - struct net_device *sdev; - - if (!(ndev = isdn_net_findif(name))) - return 1; - lp = ndev->local; - if (!(lp->flags & ISDN_NET_CONNECTED)) - return 5; - - sdev = lp->slave; - while (sdev) { - isdn_net_local *mlp = netdev_priv(sdev); - - if (mlp->slave) { /* find last connected link in chain */ - isdn_net_local *nlp = ISDN_SLAVE_PRIV(mlp); - - if (!(nlp->flags & ISDN_NET_CONNECTED)) - break; - } else if (mlp->flags & ISDN_NET_CONNECTED) - break; - - sdev = mlp->slave; - } - if (!sdev) - return 2; - - isdn_net_hangup(sdev); - return 0; -#else - return -1; -#endif -} - -/* - * PPP compression stuff - */ - - -/* Push an empty CCP Data Frame up to the daemon to wake it up and let it - generate a CCP Reset-Request or tear down CCP altogether */ - -static void isdn_ppp_ccp_kickup(struct ippp_struct *is) -{ - isdn_ppp_fill_rq(NULL, 0, PPP_COMP, is->lp->ppp_slot); -} - -/* In-kernel handling of CCP Reset-Request and Reset-Ack is necessary, - but absolutely nontrivial. The most abstruse problem we are facing is - that the generation, reception and all the handling of timeouts and - resends including proper request id management should be entirely left - to the (de)compressor, but indeed is not covered by the current API to - the (de)compressor. The API is a prototype version from PPP where only - some (de)compressors have yet been implemented and all of them are - rather simple in their reset handling. Especially, their is only one - outstanding ResetAck at a time with all of them and ResetReq/-Acks do - not have parameters. For this very special case it was sufficient to - just return an error code from the decompressor and have a single - reset() entry to communicate all the necessary information between - the framework and the (de)compressor. Bad enough, LZS is different - (and any other compressor may be different, too). It has multiple - histories (eventually) and needs to Reset each of them independently - and thus uses multiple outstanding Acks and history numbers as an - additional parameter to Reqs/Acks. - All that makes it harder to port the reset state engine into the - kernel because it is not just the same simple one as in (i)pppd but - it must be able to pass additional parameters and have multiple out- - standing Acks. We are trying to achieve the impossible by handling - reset transactions independent by their id. The id MUST change when - the data portion changes, thus any (de)compressor who uses more than - one resettable state must provide and recognize individual ids for - each individual reset transaction. The framework itself does _only_ - differentiate them by id, because it has no other semantics like the - (de)compressor might. - This looks like a major redesign of the interface would be nice, - but I don't have an idea how to do it better. */ - -/* Send a CCP Reset-Request or Reset-Ack directly from the kernel. This is - getting that lengthy because there is no simple "send-this-frame-out" - function above but every wrapper does a bit different. Hope I guess - correct in this hack... */ - -static void isdn_ppp_ccp_xmit_reset(struct ippp_struct *is, int proto, - unsigned char code, unsigned char id, - unsigned char *data, int len) -{ - struct sk_buff *skb; - unsigned char *p; - int hl; - int cnt = 0; - isdn_net_local *lp = is->lp; - - /* Alloc large enough skb */ - hl = dev->drv[lp->isdn_device]->interface->hl_hdrlen; - skb = alloc_skb(len + hl + 16, GFP_ATOMIC); - if (!skb) { - printk(KERN_WARNING - "ippp: CCP cannot send reset - out of memory\n"); - return; - } - skb_reserve(skb, hl); - - /* We may need to stuff an address and control field first */ - if (!(is->pppcfg & SC_COMP_AC)) { - p = skb_put(skb, 2); - *p++ = 0xff; - *p++ = 0x03; - } - - /* Stuff proto, code, id and length */ - p = skb_put(skb, 6); - *p++ = (proto >> 8); - *p++ = (proto & 0xff); - *p++ = code; - *p++ = id; - cnt = 4 + len; - *p++ = (cnt >> 8); - *p++ = (cnt & 0xff); - - /* Now stuff remaining bytes */ - if (len) { - skb_put_data(skb, data, len); - } - - /* skb is now ready for xmit */ - printk(KERN_DEBUG "Sending CCP Frame:\n"); - isdn_ppp_frame_log("ccp-xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - - isdn_net_write_super(lp, skb); -} - -/* Allocate the reset state vector */ -static struct ippp_ccp_reset *isdn_ppp_ccp_reset_alloc(struct ippp_struct *is) -{ - struct ippp_ccp_reset *r; - r = kzalloc(sizeof(struct ippp_ccp_reset), GFP_KERNEL); - if (!r) { - printk(KERN_ERR "ippp_ccp: failed to allocate reset data" - " structure - no mem\n"); - return NULL; - } - printk(KERN_DEBUG "ippp_ccp: allocated reset data structure %p\n", r); - is->reset = r; - return r; -} - -/* Destroy the reset state vector. Kill all pending timers first. */ -static void isdn_ppp_ccp_reset_free(struct ippp_struct *is) -{ - unsigned int id; - - printk(KERN_DEBUG "ippp_ccp: freeing reset data structure %p\n", - is->reset); - for (id = 0; id < 256; id++) { - if (is->reset->rs[id]) { - isdn_ppp_ccp_reset_free_state(is, (unsigned char)id); - } - } - kfree(is->reset); - is->reset = NULL; -} - -/* Free a given state and clear everything up for later reallocation */ -static void isdn_ppp_ccp_reset_free_state(struct ippp_struct *is, - unsigned char id) -{ - struct ippp_ccp_reset_state *rs; - - if (is->reset->rs[id]) { - printk(KERN_DEBUG "ippp_ccp: freeing state for id %d\n", id); - rs = is->reset->rs[id]; - /* Make sure the kernel will not call back later */ - if (rs->ta) - del_timer(&rs->timer); - is->reset->rs[id] = NULL; - kfree(rs); - } else { - printk(KERN_WARNING "ippp_ccp: id %d is not allocated\n", id); - } -} - -/* The timer callback function which is called when a ResetReq has timed out, - aka has never been answered by a ResetAck */ -static void isdn_ppp_ccp_timer_callback(struct timer_list *t) -{ - struct ippp_ccp_reset_state *rs = - from_timer(rs, t, timer); - - if (!rs) { - printk(KERN_ERR "ippp_ccp: timer cb with zero closure.\n"); - return; - } - if (rs->ta && rs->state == CCPResetSentReq) { - /* We are correct here */ - if (!rs->expra) { - /* Hmm, there is no Ack really expected. We can clean - up the state now, it will be reallocated if the - decompressor insists on another reset */ - rs->ta = 0; - isdn_ppp_ccp_reset_free_state(rs->is, rs->id); - return; - } - printk(KERN_DEBUG "ippp_ccp: CCP Reset timed out for id %d\n", - rs->id); - /* Push it again */ - isdn_ppp_ccp_xmit_reset(rs->is, PPP_CCP, CCP_RESETREQ, rs->id, - rs->data, rs->dlen); - /* Restart timer */ - rs->timer.expires = jiffies + HZ * 5; - add_timer(&rs->timer); - } else { - printk(KERN_WARNING "ippp_ccp: timer cb in wrong state %d\n", - rs->state); - } -} - -/* Allocate a new reset transaction state */ -static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_struct *is, - unsigned char id) -{ - struct ippp_ccp_reset_state *rs; - if (is->reset->rs[id]) { - printk(KERN_WARNING "ippp_ccp: old state exists for id %d\n", - id); - return NULL; - } else { - rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC); - if (!rs) - return NULL; - rs->state = CCPResetIdle; - rs->is = is; - rs->id = id; - timer_setup(&rs->timer, isdn_ppp_ccp_timer_callback, 0); - is->reset->rs[id] = rs; - } - return rs; -} - - -/* A decompressor wants a reset with a set of parameters - do what is - necessary to fulfill it */ -static void isdn_ppp_ccp_reset_trans(struct ippp_struct *is, - struct isdn_ppp_resetparams *rp) -{ - struct ippp_ccp_reset_state *rs; - - if (rp->valid) { - /* The decompressor defines parameters by itself */ - if (rp->rsend) { - /* And he wants us to send a request */ - if (!(rp->idval)) { - printk(KERN_ERR "ippp_ccp: decompressor must" - " specify reset id\n"); - return; - } - if (is->reset->rs[rp->id]) { - /* There is already a transaction in existence - for this id. May be still waiting for a - Ack or may be wrong. */ - rs = is->reset->rs[rp->id]; - if (rs->state == CCPResetSentReq && rs->ta) { - printk(KERN_DEBUG "ippp_ccp: reset" - " trans still in progress" - " for id %d\n", rp->id); - } else { - printk(KERN_WARNING "ippp_ccp: reset" - " trans in wrong state %d for" - " id %d\n", rs->state, rp->id); - } - } else { - /* Ok, this is a new transaction */ - printk(KERN_DEBUG "ippp_ccp: new trans for id" - " %d to be started\n", rp->id); - rs = isdn_ppp_ccp_reset_alloc_state(is, rp->id); - if (!rs) { - printk(KERN_ERR "ippp_ccp: out of mem" - " allocing ccp trans\n"); - return; - } - rs->state = CCPResetSentReq; - rs->expra = rp->expra; - if (rp->dtval) { - rs->dlen = rp->dlen; - memcpy(rs->data, rp->data, rp->dlen); - } - /* HACK TODO - add link comp here */ - isdn_ppp_ccp_xmit_reset(is, PPP_CCP, - CCP_RESETREQ, rs->id, - rs->data, rs->dlen); - /* Start the timer */ - rs->timer.expires = jiffies + 5 * HZ; - add_timer(&rs->timer); - rs->ta = 1; - } - } else { - printk(KERN_DEBUG "ippp_ccp: no reset sent\n"); - } - } else { - /* The reset params are invalid. The decompressor does not - care about them, so we just send the minimal requests - and increase ids only when an Ack is received for a - given id */ - if (is->reset->rs[is->reset->lastid]) { - /* There is already a transaction in existence - for this id. May be still waiting for a - Ack or may be wrong. */ - rs = is->reset->rs[is->reset->lastid]; - if (rs->state == CCPResetSentReq && rs->ta) { - printk(KERN_DEBUG "ippp_ccp: reset" - " trans still in progress" - " for id %d\n", rp->id); - } else { - printk(KERN_WARNING "ippp_ccp: reset" - " trans in wrong state %d for" - " id %d\n", rs->state, rp->id); - } - } else { - printk(KERN_DEBUG "ippp_ccp: new trans for id" - " %d to be started\n", is->reset->lastid); - rs = isdn_ppp_ccp_reset_alloc_state(is, - is->reset->lastid); - if (!rs) { - printk(KERN_ERR "ippp_ccp: out of mem" - " allocing ccp trans\n"); - return; - } - rs->state = CCPResetSentReq; - /* We always expect an Ack if the decompressor doesn't - know better */ - rs->expra = 1; - rs->dlen = 0; - /* HACK TODO - add link comp here */ - isdn_ppp_ccp_xmit_reset(is, PPP_CCP, CCP_RESETREQ, - rs->id, NULL, 0); - /* Start the timer */ - rs->timer.expires = jiffies + 5 * HZ; - add_timer(&rs->timer); - rs->ta = 1; - } - } -} - -/* An Ack was received for this id. This means we stop the timer and clean - up the state prior to calling the decompressors reset routine. */ -static void isdn_ppp_ccp_reset_ack_rcvd(struct ippp_struct *is, - unsigned char id) -{ - struct ippp_ccp_reset_state *rs = is->reset->rs[id]; - - if (rs) { - if (rs->ta && rs->state == CCPResetSentReq) { - /* Great, we are correct */ - if (!rs->expra) - printk(KERN_DEBUG "ippp_ccp: ResetAck received" - " for id %d but not expected\n", id); - } else { - printk(KERN_INFO "ippp_ccp: ResetAck received out of" - "sync for id %d\n", id); - } - if (rs->ta) { - rs->ta = 0; - del_timer(&rs->timer); - } - isdn_ppp_ccp_reset_free_state(is, id); - } else { - printk(KERN_INFO "ippp_ccp: ResetAck received for unknown id" - " %d\n", id); - } - /* Make sure the simple reset stuff uses a new id next time */ - is->reset->lastid++; -} - -/* - * decompress packet - * - * if master = 0, we're trying to uncompress an per-link compressed packet, - * as opposed to an compressed reconstructed-from-MPPP packet. - * proto is updated to protocol field of uncompressed packet. - * - * retval: decompressed packet, - * same packet if uncompressed, - * NULL if decompression error - */ - -static struct sk_buff *isdn_ppp_decompress(struct sk_buff *skb, struct ippp_struct *is, struct ippp_struct *master, - int *proto) -{ - void *stat = NULL; - struct isdn_ppp_compressor *ipc = NULL; - struct sk_buff *skb_out; - int len; - struct ippp_struct *ri; - struct isdn_ppp_resetparams rsparm; - unsigned char rsdata[IPPP_RESET_MAXDATABYTES]; - - if (!master) { - // per-link decompression - stat = is->link_decomp_stat; - ipc = is->link_decompressor; - ri = is; - } else { - stat = master->decomp_stat; - ipc = master->decompressor; - ri = master; - } - - if (!ipc) { - // no decompressor -> we can't decompress. - printk(KERN_DEBUG "ippp: no decompressor defined!\n"); - return skb; - } - BUG_ON(!stat); // if we have a compressor, stat has been set as well - - if ((master && *proto == PPP_COMP) || (!master && *proto == PPP_COMPFRAG)) { - // compressed packets are compressed by their protocol type - - // Set up reset params for the decompressor - memset(&rsparm, 0, sizeof(rsparm)); - rsparm.data = rsdata; - rsparm.maxdlen = IPPP_RESET_MAXDATABYTES; - - skb_out = dev_alloc_skb(is->mru + PPP_HDRLEN); - if (!skb_out) { - kfree_skb(skb); - printk(KERN_ERR "ippp: decomp memory allocation failure\n"); - return NULL; - } - len = ipc->decompress(stat, skb, skb_out, &rsparm); - kfree_skb(skb); - if (len <= 0) { - switch (len) { - case DECOMP_ERROR: - printk(KERN_INFO "ippp: decomp wants reset %s params\n", - rsparm.valid ? "with" : "without"); - - isdn_ppp_ccp_reset_trans(ri, &rsparm); - break; - case DECOMP_FATALERROR: - ri->pppcfg |= SC_DC_FERROR; - /* Kick ipppd to recognize the error */ - isdn_ppp_ccp_kickup(ri); - break; - } - kfree_skb(skb_out); - return NULL; - } - *proto = isdn_ppp_strip_proto(skb_out); - if (*proto < 0) { - kfree_skb(skb_out); - return NULL; - } - return skb_out; - } else { - // uncompressed packets are fed through the decompressor to - // update the decompressor state - ipc->incomp(stat, skb, *proto); - return skb; - } -} - -/* - * compress a frame - * type=0: normal/bundle compression - * =1: link compression - * returns original skb if we haven't compressed the frame - * and a new skb pointer if we've done it - */ -static struct sk_buff *isdn_ppp_compress(struct sk_buff *skb_in, int *proto, - struct ippp_struct *is, struct ippp_struct *master, int type) -{ - int ret; - int new_proto; - struct isdn_ppp_compressor *compressor; - void *stat; - struct sk_buff *skb_out; - - /* we do not compress control protocols */ - if (*proto < 0 || *proto > 0x3fff) { - return skb_in; - } - - if (type) { /* type=1 => Link compression */ - return skb_in; - } - else { - if (!master) { - compressor = is->compressor; - stat = is->comp_stat; - } - else { - compressor = master->compressor; - stat = master->comp_stat; - } - new_proto = PPP_COMP; - } - - if (!compressor) { - printk(KERN_ERR "isdn_ppp: No compressor set!\n"); - return skb_in; - } - if (!stat) { - printk(KERN_ERR "isdn_ppp: Compressor not initialized?\n"); - return skb_in; - } - - /* Allow for at least 150 % expansion (for now) */ - skb_out = alloc_skb(skb_in->len + skb_in->len / 2 + 32 + - skb_headroom(skb_in), GFP_ATOMIC); - if (!skb_out) - return skb_in; - skb_reserve(skb_out, skb_headroom(skb_in)); - - ret = (compressor->compress)(stat, skb_in, skb_out, *proto); - if (!ret) { - dev_kfree_skb(skb_out); - return skb_in; - } - - dev_kfree_skb(skb_in); - *proto = new_proto; - return skb_out; -} - -/* - * we received a CCP frame .. - * not a clean solution, but we MUST handle a few cases in the kernel - */ -static void isdn_ppp_receive_ccp(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *skb, int proto) -{ - struct ippp_struct *is; - struct ippp_struct *mis; - int len; - struct isdn_ppp_resetparams rsparm; - unsigned char rsdata[IPPP_RESET_MAXDATABYTES]; - - printk(KERN_DEBUG "Received CCP frame from peer slot(%d)\n", - lp->ppp_slot); - if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n", - __func__, lp->ppp_slot); - return; - } - is = ippp_table[lp->ppp_slot]; - isdn_ppp_frame_log("ccp-rcv", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - - if (lp->master) { - int slot = ISDN_MASTER_PRIV(lp)->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: slot(%d) out of range\n", - __func__, slot); - return; - } - mis = ippp_table[slot]; - } else - mis = is; - - switch (skb->data[0]) { - case CCP_CONFREQ: - if (is->debug & 0x10) - printk(KERN_DEBUG "Disable compression here!\n"); - if (proto == PPP_CCP) - mis->compflags &= ~SC_COMP_ON; - else - is->compflags &= ~SC_LINK_COMP_ON; - break; - case CCP_TERMREQ: - case CCP_TERMACK: - if (is->debug & 0x10) - printk(KERN_DEBUG "Disable (de)compression here!\n"); - if (proto == PPP_CCP) - mis->compflags &= ~(SC_DECOMP_ON | SC_COMP_ON); - else - is->compflags &= ~(SC_LINK_DECOMP_ON | SC_LINK_COMP_ON); - break; - case CCP_CONFACK: - /* if we RECEIVE an ackowledge we enable the decompressor */ - if (is->debug & 0x10) - printk(KERN_DEBUG "Enable decompression here!\n"); - if (proto == PPP_CCP) { - if (!mis->decompressor) - break; - mis->compflags |= SC_DECOMP_ON; - } else { - if (!is->decompressor) - break; - is->compflags |= SC_LINK_DECOMP_ON; - } - break; - - case CCP_RESETACK: - printk(KERN_DEBUG "Received ResetAck from peer\n"); - len = (skb->data[2] << 8) | skb->data[3]; - len -= 4; - - if (proto == PPP_CCP) { - /* If a reset Ack was outstanding for this id, then - clean up the state engine */ - isdn_ppp_ccp_reset_ack_rcvd(mis, skb->data[1]); - if (mis->decompressor && mis->decomp_stat) - mis->decompressor-> - reset(mis->decomp_stat, - skb->data[0], - skb->data[1], - len ? &skb->data[4] : NULL, - len, NULL); - /* TODO: This is not easy to decide here */ - mis->compflags &= ~SC_DECOMP_DISCARD; - } - else { - isdn_ppp_ccp_reset_ack_rcvd(is, skb->data[1]); - if (is->link_decompressor && is->link_decomp_stat) - is->link_decompressor-> - reset(is->link_decomp_stat, - skb->data[0], - skb->data[1], - len ? &skb->data[4] : NULL, - len, NULL); - /* TODO: neither here */ - is->compflags &= ~SC_LINK_DECOMP_DISCARD; - } - break; - - case CCP_RESETREQ: - printk(KERN_DEBUG "Received ResetReq from peer\n"); - /* Receiving a ResetReq means we must reset our compressor */ - /* Set up reset params for the reset entry */ - memset(&rsparm, 0, sizeof(rsparm)); - rsparm.data = rsdata; - rsparm.maxdlen = IPPP_RESET_MAXDATABYTES; - /* Isolate data length */ - len = (skb->data[2] << 8) | skb->data[3]; - len -= 4; - if (proto == PPP_CCP) { - if (mis->compressor && mis->comp_stat) - mis->compressor-> - reset(mis->comp_stat, - skb->data[0], - skb->data[1], - len ? &skb->data[4] : NULL, - len, &rsparm); - } - else { - if (is->link_compressor && is->link_comp_stat) - is->link_compressor-> - reset(is->link_comp_stat, - skb->data[0], - skb->data[1], - len ? &skb->data[4] : NULL, - len, &rsparm); - } - /* Ack the Req as specified by rsparm */ - if (rsparm.valid) { - /* Compressor reset handler decided how to answer */ - if (rsparm.rsend) { - /* We should send a Frame */ - isdn_ppp_ccp_xmit_reset(is, proto, CCP_RESETACK, - rsparm.idval ? rsparm.id - : skb->data[1], - rsparm.dtval ? - rsparm.data : NULL, - rsparm.dtval ? - rsparm.dlen : 0); - } else { - printk(KERN_DEBUG "ResetAck suppressed\n"); - } - } else { - /* We answer with a straight reflected Ack */ - isdn_ppp_ccp_xmit_reset(is, proto, CCP_RESETACK, - skb->data[1], - len ? &skb->data[4] : NULL, - len); - } - break; - } -} - - -/* - * Daemon sends a CCP frame ... - */ - -/* TODO: Clean this up with new Reset semantics */ - -/* I believe the CCP handling as-is is done wrong. Compressed frames - * should only be sent/received after CCP reaches UP state, which means - * both sides have sent CONF_ACK. Currently, we handle both directions - * independently, which means we may accept compressed frames too early - * (supposedly not a problem), but may also mean we send compressed frames - * too early, which may turn out to be a problem. - * This part of state machine should actually be handled by (i)pppd, but - * that's too big of a change now. --kai - */ - -/* Actually, we might turn this into an advantage: deal with the RFC in - * the old tradition of beeing generous on what we accept, but beeing - * strict on what we send. Thus we should just - * - accept compressed frames as soon as decompression is negotiated - * - send compressed frames only when decomp *and* comp are negotiated - * - drop rx compressed frames if we cannot decomp (instead of pushing them - * up to ipppd) - * and I tried to modify this file according to that. --abp - */ - -static void isdn_ppp_send_ccp(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *skb) -{ - struct ippp_struct *mis, *is; - int proto, slot = lp->ppp_slot; - unsigned char *data; - - if (!skb || skb->len < 3) - return; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n", - __func__, slot); - return; - } - is = ippp_table[slot]; - /* Daemon may send with or without address and control field comp */ - data = skb->data; - if (!(is->pppcfg & SC_COMP_AC) && data[0] == 0xff && data[1] == 0x03) { - data += 2; - if (skb->len < 5) - return; - } - - proto = ((int)data[0]<<8) + data[1]; - if (proto != PPP_CCP && proto != PPP_CCPFRAG) - return; - - printk(KERN_DEBUG "Received CCP frame from daemon:\n"); - isdn_ppp_frame_log("ccp-xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); - - if (lp->master) { - slot = ISDN_MASTER_PRIV(lp)->ppp_slot; - if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { - printk(KERN_ERR "%s: slot(%d) out of range\n", - __func__, slot); - return; - } - mis = ippp_table[slot]; - } else - mis = is; - if (mis != is) - printk(KERN_DEBUG "isdn_ppp: Ouch! Master CCP sends on slave slot!\n"); - - switch (data[2]) { - case CCP_CONFREQ: - if (is->debug & 0x10) - printk(KERN_DEBUG "Disable decompression here!\n"); - if (proto == PPP_CCP) - is->compflags &= ~SC_DECOMP_ON; - else - is->compflags &= ~SC_LINK_DECOMP_ON; - break; - case CCP_TERMREQ: - case CCP_TERMACK: - if (is->debug & 0x10) - printk(KERN_DEBUG "Disable (de)compression here!\n"); - if (proto == PPP_CCP) - is->compflags &= ~(SC_DECOMP_ON | SC_COMP_ON); - else - is->compflags &= ~(SC_LINK_DECOMP_ON | SC_LINK_COMP_ON); - break; - case CCP_CONFACK: - /* if we SEND an ackowledge we can/must enable the compressor */ - if (is->debug & 0x10) - printk(KERN_DEBUG "Enable compression here!\n"); - if (proto == PPP_CCP) { - if (!is->compressor) - break; - is->compflags |= SC_COMP_ON; - } else { - if (!is->compressor) - break; - is->compflags |= SC_LINK_COMP_ON; - } - break; - case CCP_RESETACK: - /* If we send a ACK we should reset our compressor */ - if (is->debug & 0x10) - printk(KERN_DEBUG "Reset decompression state here!\n"); - printk(KERN_DEBUG "ResetAck from daemon passed by\n"); - if (proto == PPP_CCP) { - /* link to master? */ - if (is->compressor && is->comp_stat) - is->compressor->reset(is->comp_stat, 0, 0, - NULL, 0, NULL); - is->compflags &= ~SC_COMP_DISCARD; - } - else { - if (is->link_compressor && is->link_comp_stat) - is->link_compressor->reset(is->link_comp_stat, - 0, 0, NULL, 0, NULL); - is->compflags &= ~SC_LINK_COMP_DISCARD; - } - break; - case CCP_RESETREQ: - /* Just let it pass by */ - printk(KERN_DEBUG "ResetReq from daemon passed by\n"); - break; - } -} - -int isdn_ppp_register_compressor(struct isdn_ppp_compressor *ipc) -{ - ipc->next = ipc_head; - ipc->prev = NULL; - if (ipc_head) { - ipc_head->prev = ipc; - } - ipc_head = ipc; - return 0; -} - -int isdn_ppp_unregister_compressor(struct isdn_ppp_compressor *ipc) -{ - if (ipc->prev) - ipc->prev->next = ipc->next; - else - ipc_head = ipc->next; - if (ipc->next) - ipc->next->prev = ipc->prev; - ipc->prev = ipc->next = NULL; - return 0; -} - -static int isdn_ppp_set_compressor(struct ippp_struct *is, struct isdn_ppp_comp_data *data) -{ - struct isdn_ppp_compressor *ipc = ipc_head; - int ret; - void *stat; - int num = data->num; - - if (is->debug & 0x10) - printk(KERN_DEBUG "[%d] Set %s type %d\n", is->unit, - (data->flags & IPPP_COMP_FLAG_XMIT) ? "compressor" : "decompressor", num); - - /* If is has no valid reset state vector, we cannot allocate a - decompressor. The decompressor would cause reset transactions - sooner or later, and they need that vector. */ - - if (!(data->flags & IPPP_COMP_FLAG_XMIT) && !is->reset) { - printk(KERN_ERR "ippp_ccp: no reset data structure - can't" - " allow decompression.\n"); - return -ENOMEM; - } - - while (ipc) { - if (ipc->num == num) { - stat = ipc->alloc(data); - if (stat) { - ret = ipc->init(stat, data, is->unit, 0); - if (!ret) { - printk(KERN_ERR "Can't init (de)compression!\n"); - ipc->free(stat); - stat = NULL; - break; - } - } - else { - printk(KERN_ERR "Can't alloc (de)compression!\n"); - break; - } - - if (data->flags & IPPP_COMP_FLAG_XMIT) { - if (data->flags & IPPP_COMP_FLAG_LINK) { - if (is->link_comp_stat) - is->link_compressor->free(is->link_comp_stat); - is->link_comp_stat = stat; - is->link_compressor = ipc; - } - else { - if (is->comp_stat) - is->compressor->free(is->comp_stat); - is->comp_stat = stat; - is->compressor = ipc; - } - } - else { - if (data->flags & IPPP_COMP_FLAG_LINK) { - if (is->link_decomp_stat) - is->link_decompressor->free(is->link_decomp_stat); - is->link_decomp_stat = stat; - is->link_decompressor = ipc; - } - else { - if (is->decomp_stat) - is->decompressor->free(is->decomp_stat); - is->decomp_stat = stat; - is->decompressor = ipc; - } - } - return 0; - } - ipc = ipc->next; - } - return -EINVAL; -} diff --git a/drivers/isdn/i4l/isdn_ppp.h b/drivers/isdn/i4l/isdn_ppp.h deleted file mode 100644 index 34b8a2ce84f3..000000000000 --- a/drivers/isdn/i4l/isdn_ppp.h +++ /dev/null @@ -1,41 +0,0 @@ -/* $Id: isdn_ppp.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem, functions for synchronous PPP (linklevel). - * - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include /* for PPP_PROTOCOL */ -#include /* for isdn_ppp info */ - -extern int isdn_ppp_read(int, struct file *, char __user *, int); -extern int isdn_ppp_write(int, struct file *, const char __user *, int); -extern int isdn_ppp_open(int, struct file *); -extern int isdn_ppp_init(void); -extern void isdn_ppp_cleanup(void); -extern int isdn_ppp_free(isdn_net_local *); -extern int isdn_ppp_bind(isdn_net_local *); -extern int isdn_ppp_autodial_filter(struct sk_buff *, isdn_net_local *); -extern int isdn_ppp_xmit(struct sk_buff *, struct net_device *); -extern void isdn_ppp_receive(isdn_net_dev *, isdn_net_local *, struct sk_buff *); -extern int isdn_ppp_dev_ioctl(struct net_device *, struct ifreq *, int); -extern __poll_t isdn_ppp_poll(struct file *, struct poll_table_struct *); -extern int isdn_ppp_ioctl(int, struct file *, unsigned int, unsigned long); -extern void isdn_ppp_release(int, struct file *); -extern int isdn_ppp_dial_slave(char *); -extern void isdn_ppp_wakeup_daemon(isdn_net_local *); - -extern int isdn_ppp_register_compressor(struct isdn_ppp_compressor *ipc); -extern int isdn_ppp_unregister_compressor(struct isdn_ppp_compressor *ipc); - -#define IPPP_OPEN 0x01 -#define IPPP_CONNECT 0x02 -#define IPPP_CLOSEWAIT 0x04 -#define IPPP_NOBLOCK 0x08 -#define IPPP_ASSIGNED 0x10 - -#define IPPP_MAX_HEADER 10 diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c deleted file mode 100644 index 43700fc19a31..000000000000 --- a/drivers/isdn/i4l/isdn_tty.c +++ /dev/null @@ -1,3756 +0,0 @@ -/* - * Linux ISDN subsystem, tty functions and AT-command emulator (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ -#undef ISDN_TTY_STAT_DEBUG - -#include -#include /* ASYNC_* flags */ -#include -#include -#include -#include -#include "isdn_common.h" -#include "isdn_tty.h" -#ifdef CONFIG_ISDN_AUDIO -#include "isdn_audio.h" -#define VBUF 0x3e0 -#define VBUFX (VBUF/16) -#endif - -#define FIX_FILE_TRANSFER -#define DUMMY_HAYES_AT - -/* Prototypes */ - -static DEFINE_MUTEX(modem_info_mutex); -static int isdn_tty_edit_at(const char *, int, modem_info *); -static void isdn_tty_check_esc(const u_char *, u_char, int, int *, u_long *); -static void isdn_tty_modem_reset_regs(modem_info *, int); -static void isdn_tty_cmd_ATA(modem_info *); -static void isdn_tty_flush_buffer(struct tty_struct *); -static void isdn_tty_modem_result(int, modem_info *); -#ifdef CONFIG_ISDN_AUDIO -static int isdn_tty_countDLE(unsigned char *, int); -#endif - -/* Leave this unchanged unless you know what you do! */ -#define MODEM_PARANOIA_CHECK -#define MODEM_DO_RESTART - -static int bit2si[8] = -{1, 5, 7, 7, 7, 7, 7, 7}; -static int si2bit[8] = -{4, 1, 4, 4, 4, 4, 4, 4}; - -/* isdn_tty_try_read() is called from within isdn_tty_rcv_skb() - * to stuff incoming data directly into a tty's flip-buffer. This - * is done to speed up tty-receiving if the receive-queue is empty. - * This routine MUST be called with interrupts off. - * Return: - * 1 = Success - * 0 = Failure, data has to be buffered and later processed by - * isdn_tty_readmodem(). - */ -static int -isdn_tty_try_read(modem_info *info, struct sk_buff *skb) -{ - struct tty_port *port = &info->port; - int c; - int len; - char last; - - if (!info->online) - return 0; - - if (!(info->mcr & UART_MCR_RTS)) - return 0; - - len = skb->len -#ifdef CONFIG_ISDN_AUDIO - + ISDN_AUDIO_SKB_DLECOUNT(skb) -#endif - ; - - c = tty_buffer_request_room(port, len); - if (c < len) - return 0; - -#ifdef CONFIG_ISDN_AUDIO - if (ISDN_AUDIO_SKB_DLECOUNT(skb)) { - int l = skb->len; - unsigned char *dp = skb->data; - while (--l) { - if (*dp == DLE) - tty_insert_flip_char(port, DLE, 0); - tty_insert_flip_char(port, *dp++, 0); - } - if (*dp == DLE) - tty_insert_flip_char(port, DLE, 0); - last = *dp; - } else { -#endif - if (len > 1) - tty_insert_flip_string(port, skb->data, len - 1); - last = skb->data[len - 1]; -#ifdef CONFIG_ISDN_AUDIO - } -#endif - if (info->emu.mdmreg[REG_CPPP] & BIT_CPPP) - tty_insert_flip_char(port, last, 0xFF); - else - tty_insert_flip_char(port, last, TTY_NORMAL); - tty_flip_buffer_push(port); - kfree_skb(skb); - - return 1; -} - -/* isdn_tty_readmodem() is called periodically from within timer-interrupt. - * It tries getting received data from the receive queue an stuff it into - * the tty's flip-buffer. - */ -void -isdn_tty_readmodem(void) -{ - int resched = 0; - int midx; - int i; - int r; - modem_info *info; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - midx = dev->m_idx[i]; - if (midx < 0) - continue; - - info = &dev->mdm.info[midx]; - if (!info->online) - continue; - - r = 0; -#ifdef CONFIG_ISDN_AUDIO - isdn_audio_eval_dtmf(info); - if ((info->vonline & 1) && (info->emu.vpar[1])) - isdn_audio_eval_silence(info); -#endif - if (info->mcr & UART_MCR_RTS) { - /* CISCO AsyncPPP Hack */ - if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP)) - r = isdn_readbchan_tty(info->isdn_driver, - info->isdn_channel, - &info->port, 0); - else - r = isdn_readbchan_tty(info->isdn_driver, - info->isdn_channel, - &info->port, 1); - if (r) - tty_flip_buffer_push(&info->port); - } else - r = 1; - - if (r) { - info->rcvsched = 0; - resched = 1; - } else - info->rcvsched = 1; - } - if (!resched) - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 0); -} - -int -isdn_tty_rcv_skb(int i, int di, int channel, struct sk_buff *skb) -{ - ulong flags; - int midx; -#ifdef CONFIG_ISDN_AUDIO - int ifmt; -#endif - modem_info *info; - - if ((midx = dev->m_idx[i]) < 0) { - /* if midx is invalid, packet is not for tty */ - return 0; - } - info = &dev->mdm.info[midx]; -#ifdef CONFIG_ISDN_AUDIO - ifmt = 1; - - if ((info->vonline) && (!info->emu.vpar[4])) - isdn_audio_calc_dtmf(info, skb->data, skb->len, ifmt); - if ((info->vonline & 1) && (info->emu.vpar[1])) - isdn_audio_calc_silence(info, skb->data, skb->len, ifmt); -#endif - if ((info->online < 2) -#ifdef CONFIG_ISDN_AUDIO - && (!(info->vonline & 1)) -#endif - ) { - /* If Modem not listening, drop data */ - kfree_skb(skb); - return 1; - } - if (info->emu.mdmreg[REG_T70] & BIT_T70) { - if (info->emu.mdmreg[REG_T70] & BIT_T70_EXT) { - /* T.70 decoding: throw away the T.70 header (2 or 4 bytes) */ - if (skb->data[0] == 3) /* pure data packet -> 4 byte headers */ - skb_pull(skb, 4); - else - if (skb->data[0] == 1) /* keepalive packet -> 2 byte hdr */ - skb_pull(skb, 2); - } else - /* T.70 decoding: Simply throw away the T.70 header (4 bytes) */ - if ((skb->data[0] == 1) && ((skb->data[1] == 0) || (skb->data[1] == 1))) - skb_pull(skb, 4); - } -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_DLECOUNT(skb) = 0; - ISDN_AUDIO_SKB_LOCK(skb) = 0; - if (info->vonline & 1) { - /* voice conversion/compression */ - switch (info->emu.vpar[3]) { - case 2: - case 3: - case 4: - /* adpcm - * Since compressed data takes less - * space, we can overwrite the buffer. - */ - skb_trim(skb, isdn_audio_xlaw2adpcm(info->adpcmr, - ifmt, - skb->data, - skb->data, - skb->len)); - break; - case 5: - /* a-law */ - if (!ifmt) - isdn_audio_ulaw2alaw(skb->data, skb->len); - break; - case 6: - /* u-law */ - if (ifmt) - isdn_audio_alaw2ulaw(skb->data, skb->len); - break; - } - ISDN_AUDIO_SKB_DLECOUNT(skb) = - isdn_tty_countDLE(skb->data, skb->len); - } -#ifdef CONFIG_ISDN_TTY_FAX - else { - if (info->faxonline & 2) { - isdn_tty_fax_bitorder(info, skb); - ISDN_AUDIO_SKB_DLECOUNT(skb) = - isdn_tty_countDLE(skb->data, skb->len); - } - } -#endif -#endif - /* Try to deliver directly via tty-buf if queue is empty */ - spin_lock_irqsave(&info->readlock, flags); - if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) - if (isdn_tty_try_read(info, skb)) { - spin_unlock_irqrestore(&info->readlock, flags); - return 1; - } - /* Direct deliver failed or queue wasn't empty. - * Queue up for later dequeueing via timer-irq. - */ - __skb_queue_tail(&dev->drv[di]->rpqueue[channel], skb); - dev->drv[di]->rcvcount[channel] += - (skb->len -#ifdef CONFIG_ISDN_AUDIO - + ISDN_AUDIO_SKB_DLECOUNT(skb) -#endif - ); - spin_unlock_irqrestore(&info->readlock, flags); - /* Schedule dequeuing */ - if ((dev->modempoll) && (info->rcvsched)) - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); - return 1; -} - -static void -isdn_tty_cleanup_xmit(modem_info *info) -{ - skb_queue_purge(&info->xmit_queue); -#ifdef CONFIG_ISDN_AUDIO - skb_queue_purge(&info->dtmf_queue); -#endif -} - -static void -isdn_tty_tint(modem_info *info) -{ - struct sk_buff *skb = skb_dequeue(&info->xmit_queue); - int len, slen; - - if (!skb) - return; - len = skb->len; - if ((slen = isdn_writebuf_skb_stub(info->isdn_driver, - info->isdn_channel, 1, skb)) == len) { - struct tty_struct *tty = info->port.tty; - info->send_outstanding++; - info->msr &= ~UART_MSR_CTS; - info->lsr &= ~UART_LSR_TEMT; - tty_wakeup(tty); - return; - } - if (slen < 0) { - /* Error: no channel, already shutdown, or wrong parameter */ - dev_kfree_skb(skb); - return; - } - skb_queue_head(&info->xmit_queue, skb); -} - -#ifdef CONFIG_ISDN_AUDIO -static int -isdn_tty_countDLE(unsigned char *buf, int len) -{ - int count = 0; - - while (len--) - if (*buf++ == DLE) - count++; - return count; -} - -/* This routine is called from within isdn_tty_write() to perform - * DLE-decoding when sending audio-data. - */ -static int -isdn_tty_handleDLEdown(modem_info *info, atemu *m, int len) -{ - unsigned char *p = &info->port.xmit_buf[info->xmit_count]; - int count = 0; - - while (len > 0) { - if (m->lastDLE) { - m->lastDLE = 0; - switch (*p) { - case DLE: - /* Escape code */ - if (len > 1) - memmove(p, p + 1, len - 1); - p--; - count++; - break; - case ETX: - /* End of data */ - info->vonline |= 4; - return count; - case DC4: - /* Abort RX */ - info->vonline &= ~1; -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG - "DLEdown: got DLE-DC4, send DLE-ETX on ttyI%d\n", - info->line); -#endif - isdn_tty_at_cout("\020\003", info); - if (!info->vonline) { -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG - "DLEdown: send VCON on ttyI%d\n", - info->line); -#endif - isdn_tty_at_cout("\r\nVCON\r\n", info); - } - /* Fall through */ - case 'q': - case 's': - /* Silence */ - if (len > 1) - memmove(p, p + 1, len - 1); - p--; - break; - } - } else { - if (*p == DLE) - m->lastDLE = 1; - else - count++; - } - p++; - len--; - } - if (len < 0) { - printk(KERN_WARNING "isdn_tty: len<0 in DLEdown\n"); - return 0; - } - return count; -} - -/* This routine is called from within isdn_tty_write() when receiving - * audio-data. It interrupts receiving, if an character other than - * ^S or ^Q is sent. - */ -static int -isdn_tty_end_vrx(const char *buf, int c) -{ - char ch; - - while (c--) { - ch = *buf; - if ((ch != 0x11) && (ch != 0x13)) - return 1; - buf++; - } - return 0; -} - -static int voice_cf[7] = -{0, 0, 4, 3, 2, 0, 0}; - -#endif /* CONFIG_ISDN_AUDIO */ - -/* isdn_tty_senddown() is called either directly from within isdn_tty_write() - * or via timer-interrupt from within isdn_tty_modem_xmit(). It pulls - * outgoing data from the tty's xmit-buffer, handles voice-decompression or - * T.70 if necessary, and finally queues it up for sending via isdn_tty_tint. - */ -static void -isdn_tty_senddown(modem_info *info) -{ - int buflen; - int skb_res; -#ifdef CONFIG_ISDN_AUDIO - int audio_len; -#endif - struct sk_buff *skb; - -#ifdef CONFIG_ISDN_AUDIO - if (info->vonline & 4) { - info->vonline &= ~6; - if (!info->vonline) { -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG - "senddown: send VCON on ttyI%d\n", - info->line); -#endif - isdn_tty_at_cout("\r\nVCON\r\n", info); - } - } -#endif - if (!(buflen = info->xmit_count)) - return; - if ((info->emu.mdmreg[REG_CTS] & BIT_CTS) != 0) - info->msr &= ~UART_MSR_CTS; - info->lsr &= ~UART_LSR_TEMT; - /* info->xmit_count is modified here and in isdn_tty_write(). - * So we return here if isdn_tty_write() is in the - * critical section. - */ - atomic_inc(&info->xmit_lock); - if (!(atomic_dec_and_test(&info->xmit_lock))) - return; - if (info->isdn_driver < 0) { - info->xmit_count = 0; - return; - } - skb_res = dev->drv[info->isdn_driver]->interface->hl_hdrlen + 4; -#ifdef CONFIG_ISDN_AUDIO - if (info->vonline & 2) - audio_len = buflen * voice_cf[info->emu.vpar[3]]; - else - audio_len = 0; - skb = dev_alloc_skb(skb_res + buflen + audio_len); -#else - skb = dev_alloc_skb(skb_res + buflen); -#endif - if (!skb) { - printk(KERN_WARNING - "isdn_tty: Out of memory in ttyI%d senddown\n", - info->line); - return; - } - skb_reserve(skb, skb_res); - skb_put_data(skb, info->port.xmit_buf, buflen); - info->xmit_count = 0; -#ifdef CONFIG_ISDN_AUDIO - if (info->vonline & 2) { - /* For now, ifmt is fixed to 1 (alaw), since this - * is used with ISDN everywhere in the world, except - * US, Canada and Japan. - * Later, when US-ISDN protocols are implemented, - * this setting will depend on the D-channel protocol. - */ - int ifmt = 1; - - /* voice conversion/decompression */ - switch (info->emu.vpar[3]) { - case 2: - case 3: - case 4: - /* adpcm, compatible to ZyXel 1496 modem - * with ROM revision 6.01 - */ - audio_len = isdn_audio_adpcm2xlaw(info->adpcms, - ifmt, - skb->data, - skb_put(skb, audio_len), - buflen); - skb_pull(skb, buflen); - skb_trim(skb, audio_len); - break; - case 5: - /* a-law */ - if (!ifmt) - isdn_audio_alaw2ulaw(skb->data, - buflen); - break; - case 6: - /* u-law */ - if (ifmt) - isdn_audio_ulaw2alaw(skb->data, - buflen); - break; - } - } -#endif /* CONFIG_ISDN_AUDIO */ - if (info->emu.mdmreg[REG_T70] & BIT_T70) { - /* Add T.70 simplified header */ - if (info->emu.mdmreg[REG_T70] & BIT_T70_EXT) - memcpy(skb_push(skb, 2), "\1\0", 2); - else - memcpy(skb_push(skb, 4), "\1\0\1\0", 4); - } - skb_queue_tail(&info->xmit_queue, skb); -} - -/************************************************************ - * - * Modem-functions - * - * mostly "stolen" from original Linux-serial.c and friends. - * - ************************************************************/ - -/* The next routine is called once from within timer-interrupt - * triggered within isdn_tty_modem_ncarrier(). It calls - * isdn_tty_modem_result() to stuff a "NO CARRIER" Message - * into the tty's buffer. - */ -static void -isdn_tty_modem_do_ncarrier(struct timer_list *t) -{ - modem_info *info = from_timer(info, t, nc_timer); - isdn_tty_modem_result(RESULT_NO_CARRIER, info); -} - -/* Next routine is called, whenever the DTR-signal is raised. - * It checks the ncarrier-flag, and triggers the above routine - * when necessary. The ncarrier-flag is set, whenever DTR goes - * low. - */ -static void -isdn_tty_modem_ncarrier(modem_info *info) -{ - if (info->ncarrier) { - info->nc_timer.expires = jiffies + HZ; - add_timer(&info->nc_timer); - } -} - -/* - * return the usage calculated by si and layer 2 protocol - */ -static int -isdn_calc_usage(int si, int l2) -{ - int usg = ISDN_USAGE_MODEM; - -#ifdef CONFIG_ISDN_AUDIO - if (si == 1) { - switch (l2) { - case ISDN_PROTO_L2_MODEM: - usg = ISDN_USAGE_MODEM; - break; -#ifdef CONFIG_ISDN_TTY_FAX - case ISDN_PROTO_L2_FAX: - usg = ISDN_USAGE_FAX; - break; -#endif - case ISDN_PROTO_L2_TRANS: - default: - usg = ISDN_USAGE_VOICE; - break; - } - } -#endif - return (usg); -} - -/* isdn_tty_dial() performs dialing of a tty an the necessary - * setup of the lower levels before that. - */ -static void -isdn_tty_dial(char *n, modem_info *info, atemu *m) -{ - int usg = ISDN_USAGE_MODEM; - int si = 7; - int l2 = m->mdmreg[REG_L2PROT]; - u_long flags; - isdn_ctrl cmd; - int i; - int j; - - for (j = 7; j >= 0; j--) - if (m->mdmreg[REG_SI1] & (1 << j)) { - si = bit2si[j]; - break; - } - usg = isdn_calc_usage(si, l2); -#ifdef CONFIG_ISDN_AUDIO - if ((si == 1) && - (l2 != ISDN_PROTO_L2_MODEM) -#ifdef CONFIG_ISDN_TTY_FAX - && (l2 != ISDN_PROTO_L2_FAX) -#endif - ) { - l2 = ISDN_PROTO_L2_TRANS; - usg = ISDN_USAGE_VOICE; - } -#endif - m->mdmreg[REG_SI1I] = si2bit[si]; - spin_lock_irqsave(&dev->lock, flags); - i = isdn_get_free_channel(usg, l2, m->mdmreg[REG_L3PROT], -1, -1, m->msn); - if (i < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_tty_modem_result(RESULT_NO_DIALTONE, info); - } else { - info->isdn_driver = dev->drvmap[i]; - info->isdn_channel = dev->chanmap[i]; - info->drv_index = i; - dev->m_idx[i] = info->line; - dev->usage[i] |= ISDN_USAGE_OUTGOING; - info->last_dir = 1; - strcpy(info->last_num, n); - isdn_info_update(); - spin_unlock_irqrestore(&dev->lock, flags); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_CLREAZ; - isdn_command(&cmd); - strcpy(cmd.parm.num, isdn_map_eaz2msn(m->msn, info->isdn_driver)); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETEAZ; - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL2; - info->last_l2 = l2; - cmd.arg = info->isdn_channel + (l2 << 8); - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = info->isdn_channel + (m->mdmreg[REG_L3PROT] << 8); -#ifdef CONFIG_ISDN_TTY_FAX - if (l2 == ISDN_PROTO_L2_FAX) { - cmd.parm.fax = info->fax; - info->fax->direction = ISDN_TTY_FAX_CONN_OUT; - } -#endif - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - sprintf(cmd.parm.setup.phone, "%s", n); - sprintf(cmd.parm.setup.eazmsn, "%s", - isdn_map_eaz2msn(m->msn, info->isdn_driver)); - cmd.parm.setup.si1 = si; - cmd.parm.setup.si2 = m->mdmreg[REG_SI2]; - cmd.command = ISDN_CMD_DIAL; - info->dialing = 1; - info->emu.carrierwait = 0; - strcpy(dev->num[i], n); - isdn_info_update(); - isdn_command(&cmd); - isdn_timer_ctrl(ISDN_TIMER_CARRIER, 1); - } -} - -/* isdn_tty_hangup() disassociates a tty from the real - * ISDN-line (hangup). The usage-status is cleared - * and some cleanup is done also. - */ -void -isdn_tty_modem_hup(modem_info *info, int local) -{ - isdn_ctrl cmd; - int di, ch; - - if (!info) - return; - - di = info->isdn_driver; - ch = info->isdn_channel; - if (di < 0 || ch < 0) - return; - - info->isdn_driver = -1; - info->isdn_channel = -1; - -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup ttyI%d\n", info->line); -#endif - info->rcvsched = 0; - isdn_tty_flush_buffer(info->port.tty); - if (info->online) { - info->last_lhup = local; - info->online = 0; - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - } -#ifdef CONFIG_ISDN_AUDIO - info->vonline = 0; -#ifdef CONFIG_ISDN_TTY_FAX - info->faxonline = 0; - info->fax->phase = ISDN_FAX_PHASE_IDLE; -#endif - info->emu.vpar[4] = 0; - info->emu.vpar[5] = 8; - kfree(info->dtmf_state); - info->dtmf_state = NULL; - kfree(info->silence_state); - info->silence_state = NULL; - kfree(info->adpcms); - info->adpcms = NULL; - kfree(info->adpcmr); - info->adpcmr = NULL; -#endif - if ((info->msr & UART_MSR_RI) && - (info->emu.mdmreg[REG_RUNG] & BIT_RUNG)) - isdn_tty_modem_result(RESULT_RUNG, info); - info->msr &= ~(UART_MSR_DCD | UART_MSR_RI); - info->lsr |= UART_LSR_TEMT; - - if (local) { - cmd.driver = di; - cmd.command = ISDN_CMD_HANGUP; - cmd.arg = ch; - isdn_command(&cmd); - } - - isdn_all_eaz(di, ch); - info->emu.mdmreg[REG_RINGCNT] = 0; - isdn_free_channel(di, ch, 0); - - if (info->drv_index >= 0) { - dev->m_idx[info->drv_index] = -1; - info->drv_index = -1; - } -} - -/* - * Begin of a CAPI like interface, currently used only for - * supplementary service (CAPI 2.0 part III) - */ -#include -#include - -int -isdn_tty_capi_facility(capi_msg *cm) { - return (-1); /* dummy */ -} - -/* isdn_tty_suspend() tries to suspend the current tty connection - */ -static void -isdn_tty_suspend(char *id, modem_info *info, atemu *m) -{ - isdn_ctrl cmd; - - int l; - - if (!info) - return; - -#ifdef ISDN_DEBUG_MODEM_SERVICES - printk(KERN_DEBUG "Msusp ttyI%d\n", info->line); -#endif - l = strlen(id); - if ((info->isdn_driver >= 0)) { - cmd.parm.cmsg.Length = l + 18; - cmd.parm.cmsg.Command = CAPI_FACILITY; - cmd.parm.cmsg.Subcommand = CAPI_REQ; - cmd.parm.cmsg.adr.Controller = info->isdn_driver + 1; - cmd.parm.cmsg.para[0] = 3; /* 16 bit 0x0003 suplementary service */ - cmd.parm.cmsg.para[1] = 0; - cmd.parm.cmsg.para[2] = l + 3; - cmd.parm.cmsg.para[3] = 4; /* 16 bit 0x0004 Suspend */ - cmd.parm.cmsg.para[4] = 0; - cmd.parm.cmsg.para[5] = l; - memcpy(&cmd.parm.cmsg.para[6], id, l); - cmd.command = CAPI_PUT_MESSAGE; - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - isdn_command(&cmd); - } -} - -/* isdn_tty_resume() tries to resume a suspended call - * setup of the lower levels before that. unfortunately here is no - * checking for compatibility of used protocols implemented by Q931 - * It does the same things like isdn_tty_dial, the last command - * is different, may be we can merge it. - */ - -static void -isdn_tty_resume(char *id, modem_info *info, atemu *m) -{ - int usg = ISDN_USAGE_MODEM; - int si = 7; - int l2 = m->mdmreg[REG_L2PROT]; - isdn_ctrl cmd; - ulong flags; - int i; - int j; - int l; - - l = strlen(id); - for (j = 7; j >= 0; j--) - if (m->mdmreg[REG_SI1] & (1 << j)) { - si = bit2si[j]; - break; - } - usg = isdn_calc_usage(si, l2); -#ifdef CONFIG_ISDN_AUDIO - if ((si == 1) && - (l2 != ISDN_PROTO_L2_MODEM) -#ifdef CONFIG_ISDN_TTY_FAX - && (l2 != ISDN_PROTO_L2_FAX) -#endif - ) { - l2 = ISDN_PROTO_L2_TRANS; - usg = ISDN_USAGE_VOICE; - } -#endif - m->mdmreg[REG_SI1I] = si2bit[si]; - spin_lock_irqsave(&dev->lock, flags); - i = isdn_get_free_channel(usg, l2, m->mdmreg[REG_L3PROT], -1, -1, m->msn); - if (i < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_tty_modem_result(RESULT_NO_DIALTONE, info); - } else { - info->isdn_driver = dev->drvmap[i]; - info->isdn_channel = dev->chanmap[i]; - info->drv_index = i; - dev->m_idx[i] = info->line; - dev->usage[i] |= ISDN_USAGE_OUTGOING; - info->last_dir = 1; -// strcpy(info->last_num, n); - isdn_info_update(); - spin_unlock_irqrestore(&dev->lock, flags); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_CLREAZ; - isdn_command(&cmd); - strcpy(cmd.parm.num, isdn_map_eaz2msn(m->msn, info->isdn_driver)); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETEAZ; - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL2; - info->last_l2 = l2; - cmd.arg = info->isdn_channel + (l2 << 8); - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = info->isdn_channel + (m->mdmreg[REG_L3PROT] << 8); - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.parm.cmsg.Length = l + 18; - cmd.parm.cmsg.Command = CAPI_FACILITY; - cmd.parm.cmsg.Subcommand = CAPI_REQ; - cmd.parm.cmsg.adr.Controller = info->isdn_driver + 1; - cmd.parm.cmsg.para[0] = 3; /* 16 bit 0x0003 suplementary service */ - cmd.parm.cmsg.para[1] = 0; - cmd.parm.cmsg.para[2] = l + 3; - cmd.parm.cmsg.para[3] = 5; /* 16 bit 0x0005 Resume */ - cmd.parm.cmsg.para[4] = 0; - cmd.parm.cmsg.para[5] = l; - memcpy(&cmd.parm.cmsg.para[6], id, l); - cmd.command = CAPI_PUT_MESSAGE; - info->dialing = 1; -// strcpy(dev->num[i], n); - isdn_info_update(); - isdn_command(&cmd); - isdn_timer_ctrl(ISDN_TIMER_CARRIER, 1); - } -} - -/* isdn_tty_send_msg() sends a message to a HL driver - * This is used for hybrid modem cards to send AT commands to it - */ - -static void -isdn_tty_send_msg(modem_info *info, atemu *m, char *msg) -{ - int usg = ISDN_USAGE_MODEM; - int si = 7; - int l2 = m->mdmreg[REG_L2PROT]; - isdn_ctrl cmd; - ulong flags; - int i; - int j; - int l; - - l = min(strlen(msg), sizeof(cmd.parm) - sizeof(cmd.parm.cmsg) - + sizeof(cmd.parm.cmsg.para) - 2); - - if (!l) { - isdn_tty_modem_result(RESULT_ERROR, info); - return; - } - for (j = 7; j >= 0; j--) - if (m->mdmreg[REG_SI1] & (1 << j)) { - si = bit2si[j]; - break; - } - usg = isdn_calc_usage(si, l2); -#ifdef CONFIG_ISDN_AUDIO - if ((si == 1) && - (l2 != ISDN_PROTO_L2_MODEM) -#ifdef CONFIG_ISDN_TTY_FAX - && (l2 != ISDN_PROTO_L2_FAX) -#endif - ) { - l2 = ISDN_PROTO_L2_TRANS; - usg = ISDN_USAGE_VOICE; - } -#endif - m->mdmreg[REG_SI1I] = si2bit[si]; - spin_lock_irqsave(&dev->lock, flags); - i = isdn_get_free_channel(usg, l2, m->mdmreg[REG_L3PROT], -1, -1, m->msn); - if (i < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - isdn_tty_modem_result(RESULT_NO_DIALTONE, info); - } else { - info->isdn_driver = dev->drvmap[i]; - info->isdn_channel = dev->chanmap[i]; - info->drv_index = i; - dev->m_idx[i] = info->line; - dev->usage[i] |= ISDN_USAGE_OUTGOING; - info->last_dir = 1; - isdn_info_update(); - spin_unlock_irqrestore(&dev->lock, flags); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_CLREAZ; - isdn_command(&cmd); - strcpy(cmd.parm.num, isdn_map_eaz2msn(m->msn, info->isdn_driver)); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETEAZ; - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL2; - info->last_l2 = l2; - cmd.arg = info->isdn_channel + (l2 << 8); - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = info->isdn_channel + (m->mdmreg[REG_L3PROT] << 8); - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.parm.cmsg.Length = l + 14; - cmd.parm.cmsg.Command = CAPI_MANUFACTURER; - cmd.parm.cmsg.Subcommand = CAPI_REQ; - cmd.parm.cmsg.adr.Controller = info->isdn_driver + 1; - cmd.parm.cmsg.para[0] = l + 1; - strncpy(&cmd.parm.cmsg.para[1], msg, l); - cmd.parm.cmsg.para[l + 1] = 0xd; - cmd.command = CAPI_PUT_MESSAGE; -/* info->dialing = 1; - strcpy(dev->num[i], n); - isdn_info_update(); -*/ - isdn_command(&cmd); - } -} - -static inline int -isdn_tty_paranoia_check(modem_info *info, char *name, const char *routine) -{ -#ifdef MODEM_PARANOIA_CHECK - if (!info) { - printk(KERN_WARNING "isdn_tty: null info_struct for %s in %s\n", - name, routine); - return 1; - } - if (info->magic != ISDN_ASYNC_MAGIC) { - printk(KERN_WARNING "isdn_tty: bad magic for modem struct %s in %s\n", - name, routine); - return 1; - } -#endif - return 0; -} - -/* - * This routine is called to set the UART divisor registers to match - * the specified baud rate for a serial port. - */ -static void -isdn_tty_change_speed(modem_info *info) -{ - struct tty_port *port = &info->port; - uint cflag, - cval, - quot; - int i; - - if (!port->tty) - return; - cflag = port->tty->termios.c_cflag; - - quot = i = cflag & CBAUD; - if (i & CBAUDEX) { - i &= ~CBAUDEX; - if (i < 1 || i > 2) - port->tty->termios.c_cflag &= ~CBAUDEX; - else - i += 15; - } - if (quot) { - info->mcr |= UART_MCR_DTR; - isdn_tty_modem_ncarrier(info); - } else { - info->mcr &= ~UART_MCR_DTR; - if (info->emu.mdmreg[REG_DTRHUP] & BIT_DTRHUP) { -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in changespeed\n"); -#endif - if (info->online) - info->ncarrier = 1; - isdn_tty_modem_reset_regs(info, 0); - isdn_tty_modem_hup(info, 1); - } - return; - } - /* byte size and parity */ - cval = cflag & (CSIZE | CSTOPB); - cval >>= 4; - if (cflag & PARENB) - cval |= UART_LCR_PARITY; - if (!(cflag & PARODD)) - cval |= UART_LCR_EPAR; - - tty_port_set_check_carrier(port, ~cflag & CLOCAL); -} - -static int -isdn_tty_startup(modem_info *info) -{ - if (tty_port_initialized(&info->port)) - return 0; - isdn_lock_drivers(); -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "starting up ttyi%d ...\n", info->line); -#endif - /* - * Now, initialize the UART - */ - info->mcr = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2; - if (info->port.tty) - clear_bit(TTY_IO_ERROR, &info->port.tty->flags); - /* - * and set the speed of the serial port - */ - isdn_tty_change_speed(info); - - tty_port_set_initialized(&info->port, 1); - info->msr |= (UART_MSR_DSR | UART_MSR_CTS); - info->send_outstanding = 0; - return 0; -} - -/* - * This routine will shutdown a serial port; interrupts are disabled, and - * DTR is dropped if the hangup on close termio flag is on. - */ -static void -isdn_tty_shutdown(modem_info *info) -{ - if (!tty_port_initialized(&info->port)) - return; -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "Shutting down isdnmodem port %d ....\n", info->line); -#endif - isdn_unlock_drivers(); - info->msr &= ~UART_MSR_RI; - if (!info->port.tty || (info->port.tty->termios.c_cflag & HUPCL)) { - info->mcr &= ~(UART_MCR_DTR | UART_MCR_RTS); - if (info->emu.mdmreg[REG_DTRHUP] & BIT_DTRHUP) { - isdn_tty_modem_reset_regs(info, 0); -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in isdn_tty_shutdown\n"); -#endif - isdn_tty_modem_hup(info, 1); - } - } - if (info->port.tty) - set_bit(TTY_IO_ERROR, &info->port.tty->flags); - - tty_port_set_initialized(&info->port, 0); -} - -/* isdn_tty_write() is the main send-routine. It is called from the upper - * levels within the kernel to perform sending data. Depending on the - * online-flag it either directs output to the at-command-interpreter or - * to the lower level. Additional tasks done here: - * - If online, check for escape-sequence (+++) - * - If sending audio-data, call isdn_tty_DLEdown() to parse DLE-codes. - * - If receiving audio-data, call isdn_tty_end_vrx() to abort if needed. - * - If dialing, abort dial. - */ -static int -isdn_tty_write(struct tty_struct *tty, const u_char *buf, int count) -{ - int c; - int total = 0; - modem_info *info = (modem_info *) tty->driver_data; - atemu *m = &info->emu; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_write")) - return 0; - /* See isdn_tty_senddown() */ - atomic_inc(&info->xmit_lock); - while (1) { - c = count; - if (c > info->xmit_size - info->xmit_count) - c = info->xmit_size - info->xmit_count; - if (info->isdn_driver >= 0 && c > dev->drv[info->isdn_driver]->maxbufsize) - c = dev->drv[info->isdn_driver]->maxbufsize; - if (c <= 0) - break; - if ((info->online > 1) -#ifdef CONFIG_ISDN_AUDIO - || (info->vonline & 3) -#endif - ) { -#ifdef CONFIG_ISDN_AUDIO - if (!info->vonline) -#endif - isdn_tty_check_esc(buf, m->mdmreg[REG_ESC], c, - &(m->pluscount), - &(m->lastplus)); - memcpy(&info->port.xmit_buf[info->xmit_count], buf, c); -#ifdef CONFIG_ISDN_AUDIO - if (info->vonline) { - int cc = isdn_tty_handleDLEdown(info, m, c); - if (info->vonline & 2) { - if (!cc) { - /* If DLE decoding results in zero-transmit, but - * c originally was non-zero, do a wakeup. - */ - tty_wakeup(tty); - info->msr |= UART_MSR_CTS; - info->lsr |= UART_LSR_TEMT; - } - info->xmit_count += cc; - } - if ((info->vonline & 3) == 1) { - /* Do NOT handle Ctrl-Q or Ctrl-S - * when in full-duplex audio mode. - */ - if (isdn_tty_end_vrx(buf, c)) { - info->vonline &= ~1; -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG - "got !^Q/^S, send DLE-ETX,VCON on ttyI%d\n", - info->line); -#endif - isdn_tty_at_cout("\020\003\r\nVCON\r\n", info); - } - } - } else - if (TTY_IS_FCLASS1(info)) { - int cc = isdn_tty_handleDLEdown(info, m, c); - - if (info->vonline & 4) { /* ETX seen */ - isdn_ctrl c; - - c.command = ISDN_CMD_FAXCMD; - c.driver = info->isdn_driver; - c.arg = info->isdn_channel; - c.parm.aux.cmd = ISDN_FAX_CLASS1_CTRL; - c.parm.aux.subcmd = ETX; - isdn_command(&c); - } - info->vonline = 0; -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG "fax dle cc/c %d/%d\n", cc, c); -#endif - info->xmit_count += cc; - } else -#endif - info->xmit_count += c; - } else { - info->msr |= UART_MSR_CTS; - info->lsr |= UART_LSR_TEMT; - if (info->dialing) { - info->dialing = 0; -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in isdn_tty_write\n"); -#endif - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - isdn_tty_modem_hup(info, 1); - } else - c = isdn_tty_edit_at(buf, c, info); - } - buf += c; - count -= c; - total += c; - } - atomic_dec(&info->xmit_lock); - if ((info->xmit_count) || !skb_queue_empty(&info->xmit_queue)) { - if (m->mdmreg[REG_DXMT] & BIT_DXMT) { - isdn_tty_senddown(info); - isdn_tty_tint(info); - } - isdn_timer_ctrl(ISDN_TIMER_MODEMXMIT, 1); - } - return total; -} - -static int -isdn_tty_write_room(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - int ret; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_write_room")) - return 0; - if (!info->online) - return info->xmit_size; - ret = info->xmit_size - info->xmit_count; - return (ret < 0) ? 0 : ret; -} - -static int -isdn_tty_chars_in_buffer(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_chars_in_buffer")) - return 0; - if (!info->online) - return 0; - return (info->xmit_count); -} - -static void -isdn_tty_flush_buffer(struct tty_struct *tty) -{ - modem_info *info; - - if (!tty) { - return; - } - info = (modem_info *) tty->driver_data; - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_flush_buffer")) { - return; - } - isdn_tty_cleanup_xmit(info); - info->xmit_count = 0; - tty_wakeup(tty); -} - -static void -isdn_tty_flush_chars(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_flush_chars")) - return; - if ((info->xmit_count) || !skb_queue_empty(&info->xmit_queue)) - isdn_timer_ctrl(ISDN_TIMER_MODEMXMIT, 1); -} - -/* - * ------------------------------------------------------------ - * isdn_tty_throttle() - * - * This routine is called by the upper-layer tty layer to signal that - * incoming characters should be throttled. - * ------------------------------------------------------------ - */ -static void -isdn_tty_throttle(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_throttle")) - return; - if (I_IXOFF(tty)) - info->x_char = STOP_CHAR(tty); - info->mcr &= ~UART_MCR_RTS; -} - -static void -isdn_tty_unthrottle(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_unthrottle")) - return; - if (I_IXOFF(tty)) { - if (info->x_char) - info->x_char = 0; - else - info->x_char = START_CHAR(tty); - } - info->mcr |= UART_MCR_RTS; -} - -/* - * ------------------------------------------------------------ - * isdn_tty_ioctl() and friends - * ------------------------------------------------------------ - */ - -/* - * isdn_tty_get_lsr_info - get line status register info - * - * Purpose: Let user call ioctl() to get info when the UART physically - * is emptied. On bus types like RS485, the transmitter must - * release the bus after transmitting. This must be done when - * the transmit shift register is empty, not be done when the - * transmit holding register is empty. This functionality - * allows RS485 driver to be written in user space. - */ -static int -isdn_tty_get_lsr_info(modem_info *info, uint __user *value) -{ - u_char status; - uint result; - - status = info->lsr; - result = ((status & UART_LSR_TEMT) ? TIOCSER_TEMT : 0); - return put_user(result, value); -} - - -static int -isdn_tty_tiocmget(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - u_char control, status; - - if (isdn_tty_paranoia_check(info, tty->name, __func__)) - return -ENODEV; - if (tty_io_error(tty)) - return -EIO; - - mutex_lock(&modem_info_mutex); -#ifdef ISDN_DEBUG_MODEM_IOCTL - printk(KERN_DEBUG "ttyI%d ioctl TIOCMGET\n", info->line); -#endif - - control = info->mcr; - status = info->msr; - mutex_unlock(&modem_info_mutex); - return ((control & UART_MCR_RTS) ? TIOCM_RTS : 0) - | ((control & UART_MCR_DTR) ? TIOCM_DTR : 0) - | ((status & UART_MSR_DCD) ? TIOCM_CAR : 0) - | ((status & UART_MSR_RI) ? TIOCM_RNG : 0) - | ((status & UART_MSR_DSR) ? TIOCM_DSR : 0) - | ((status & UART_MSR_CTS) ? TIOCM_CTS : 0); -} - -static int -isdn_tty_tiocmset(struct tty_struct *tty, - unsigned int set, unsigned int clear) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, __func__)) - return -ENODEV; - if (tty_io_error(tty)) - return -EIO; - -#ifdef ISDN_DEBUG_MODEM_IOCTL - printk(KERN_DEBUG "ttyI%d ioctl TIOCMxxx: %x %x\n", info->line, set, clear); -#endif - - mutex_lock(&modem_info_mutex); - if (set & TIOCM_RTS) - info->mcr |= UART_MCR_RTS; - if (set & TIOCM_DTR) { - info->mcr |= UART_MCR_DTR; - isdn_tty_modem_ncarrier(info); - } - - if (clear & TIOCM_RTS) - info->mcr &= ~UART_MCR_RTS; - if (clear & TIOCM_DTR) { - info->mcr &= ~UART_MCR_DTR; - if (info->emu.mdmreg[REG_DTRHUP] & BIT_DTRHUP) { - isdn_tty_modem_reset_regs(info, 0); -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in TIOCMSET\n"); -#endif - if (info->online) - info->ncarrier = 1; - isdn_tty_modem_hup(info, 1); - } - } - mutex_unlock(&modem_info_mutex); - return 0; -} - -static int -isdn_tty_ioctl(struct tty_struct *tty, uint cmd, ulong arg) -{ - modem_info *info = (modem_info *) tty->driver_data; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_ioctl")) - return -ENODEV; - if (tty_io_error(tty)) - return -EIO; - switch (cmd) { - case TIOCSERGETLSR: /* Get line status register */ -#ifdef ISDN_DEBUG_MODEM_IOCTL - printk(KERN_DEBUG "ttyI%d ioctl TIOCSERGETLSR\n", info->line); -#endif - return isdn_tty_get_lsr_info(info, (uint __user *) arg); - default: -#ifdef ISDN_DEBUG_MODEM_IOCTL - printk(KERN_DEBUG "UNKNOWN ioctl 0x%08x on ttyi%d\n", cmd, info->line); -#endif - return -ENOIOCTLCMD; - } - return 0; -} - -static void -isdn_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) -{ - modem_info *info = (modem_info *) tty->driver_data; - - mutex_lock(&modem_info_mutex); - if (!old_termios) - isdn_tty_change_speed(info); - else { - if (tty->termios.c_cflag == old_termios->c_cflag && - tty->termios.c_ispeed == old_termios->c_ispeed && - tty->termios.c_ospeed == old_termios->c_ospeed) { - mutex_unlock(&modem_info_mutex); - return; - } - isdn_tty_change_speed(info); - } - mutex_unlock(&modem_info_mutex); -} - -/* - * ------------------------------------------------------------ - * isdn_tty_open() and friends - * ------------------------------------------------------------ - */ - -static int isdn_tty_install(struct tty_driver *driver, struct tty_struct *tty) -{ - modem_info *info = &dev->mdm.info[tty->index]; - - if (isdn_tty_paranoia_check(info, tty->name, __func__)) - return -ENODEV; - - tty->driver_data = info; - - return tty_port_install(&info->port, driver, tty); -} - -/* - * This routine is called whenever a serial port is opened. It - * enables interrupts for a serial port, linking in its async structure into - * the IRQ chain. It also performs the serial-specific - * initialization for the tty structure. - */ -static int -isdn_tty_open(struct tty_struct *tty, struct file *filp) -{ - modem_info *info = tty->driver_data; - struct tty_port *port = &info->port; - int retval; - -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_open %s, count = %d\n", tty->name, - port->count); -#endif - port->count++; - port->tty = tty; - /* - * Start up serial port - */ - retval = isdn_tty_startup(info); - if (retval) { -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_open return after startup\n"); -#endif - return retval; - } - retval = tty_port_block_til_ready(port, tty, filp); - if (retval) { -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_open return after isdn_tty_block_til_ready \n"); -#endif - return retval; - } -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_open ttyi%d successful...\n", info->line); -#endif - dev->modempoll++; -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_open normal exit\n"); -#endif - return 0; -} - -static void -isdn_tty_close(struct tty_struct *tty, struct file *filp) -{ - modem_info *info = (modem_info *) tty->driver_data; - struct tty_port *port = &info->port; - ulong timeout; - - if (!info || isdn_tty_paranoia_check(info, tty->name, "isdn_tty_close")) - return; - if (tty_hung_up_p(filp)) { -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_close return after tty_hung_up_p\n"); -#endif - return; - } - if ((tty->count == 1) && (port->count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. Info->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - printk(KERN_ERR "isdn_tty_close: bad port count; tty->count is 1, " - "info->count is %d\n", port->count); - port->count = 1; - } - if (--port->count < 0) { - printk(KERN_ERR "isdn_tty_close: bad port count for ttyi%d: %d\n", - info->line, port->count); - port->count = 0; - } - if (port->count) { -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_close after info->count != 0\n"); -#endif - return; - } - info->closing = 1; - - tty->closing = 1; - /* - * At this point we stop accepting input. To do this, we - * disable the receive line status interrupts, and tell the - * interrupt driver to stop checking the data ready bit in the - * line status register. - */ - if (tty_port_initialized(port)) { - tty_wait_until_sent(tty, 3000); /* 30 seconds timeout */ - /* - * Before we drop DTR, make sure the UART transmitter - * has completely drained; this is especially - * important if there is a transmit FIFO! - */ - timeout = jiffies + HZ; - while (!(info->lsr & UART_LSR_TEMT)) { - schedule_timeout_interruptible(20); - if (time_after(jiffies, timeout)) - break; - } - } - dev->modempoll--; - isdn_tty_shutdown(info); - isdn_tty_flush_buffer(tty); - tty_ldisc_flush(tty); - port->tty = NULL; - info->ncarrier = 0; - - tty_port_close_end(port, tty); - info->closing = 0; -#ifdef ISDN_DEBUG_MODEM_OPEN - printk(KERN_DEBUG "isdn_tty_close normal exit\n"); -#endif -} - -/* - * isdn_tty_hangup() --- called by tty_hangup() when a hangup is signaled. - */ -static void -isdn_tty_hangup(struct tty_struct *tty) -{ - modem_info *info = (modem_info *) tty->driver_data; - struct tty_port *port = &info->port; - - if (isdn_tty_paranoia_check(info, tty->name, "isdn_tty_hangup")) - return; - isdn_tty_shutdown(info); - port->count = 0; - tty_port_set_active(port, 0); - port->tty = NULL; - wake_up_interruptible(&port->open_wait); -} - -/* This routine initializes all emulator-data. - */ -static void -isdn_tty_reset_profile(atemu *m) -{ - m->profile[0] = 0; - m->profile[1] = 0; - m->profile[2] = 43; - m->profile[3] = 13; - m->profile[4] = 10; - m->profile[5] = 8; - m->profile[6] = 3; - m->profile[7] = 60; - m->profile[8] = 2; - m->profile[9] = 6; - m->profile[10] = 7; - m->profile[11] = 70; - m->profile[12] = 0x45; - m->profile[13] = 4; - m->profile[14] = ISDN_PROTO_L2_X75I; - m->profile[15] = ISDN_PROTO_L3_TRANS; - m->profile[16] = ISDN_SERIAL_XMIT_SIZE / 16; - m->profile[17] = ISDN_MODEM_WINSIZE; - m->profile[18] = 4; - m->profile[19] = 0; - m->profile[20] = 0; - m->profile[23] = 0; - m->pmsn[0] = '\0'; - m->plmsn[0] = '\0'; -} - -#ifdef CONFIG_ISDN_AUDIO -static void -isdn_tty_modem_reset_vpar(atemu *m) -{ - m->vpar[0] = 2; /* Voice-device (2 = phone line) */ - m->vpar[1] = 0; /* Silence detection level (0 = none ) */ - m->vpar[2] = 70; /* Silence interval (7 sec. ) */ - m->vpar[3] = 2; /* Compression type (1 = ADPCM-2 ) */ - m->vpar[4] = 0; /* DTMF detection level (0 = softcode ) */ - m->vpar[5] = 8; /* DTMF interval (8 * 5 ms. ) */ -} -#endif - -#ifdef CONFIG_ISDN_TTY_FAX -static void -isdn_tty_modem_reset_faxpar(modem_info *info) -{ - T30_s *f = info->fax; - - f->code = 0; - f->phase = ISDN_FAX_PHASE_IDLE; - f->direction = 0; - f->resolution = 1; /* fine */ - f->rate = 5; /* 14400 bit/s */ - f->width = 0; - f->length = 0; - f->compression = 0; - f->ecm = 0; - f->binary = 0; - f->scantime = 0; - memset(&f->id[0], 32, FAXIDLEN - 1); - f->id[FAXIDLEN - 1] = 0; - f->badlin = 0; - f->badmul = 0; - f->bor = 0; - f->nbc = 0; - f->cq = 0; - f->cr = 0; - f->ctcrty = 0; - f->minsp = 0; - f->phcto = 30; - f->rel = 0; - memset(&f->pollid[0], 32, FAXIDLEN - 1); - f->pollid[FAXIDLEN - 1] = 0; -} -#endif - -static void -isdn_tty_modem_reset_regs(modem_info *info, int force) -{ - atemu *m = &info->emu; - if ((m->mdmreg[REG_DTRR] & BIT_DTRR) || force) { - memcpy(m->mdmreg, m->profile, ISDN_MODEM_NUMREG); - memcpy(m->msn, m->pmsn, ISDN_MSNLEN); - memcpy(m->lmsn, m->plmsn, ISDN_LMSNLEN); - info->xmit_size = m->mdmreg[REG_PSIZE] * 16; - } -#ifdef CONFIG_ISDN_AUDIO - isdn_tty_modem_reset_vpar(m); -#endif -#ifdef CONFIG_ISDN_TTY_FAX - isdn_tty_modem_reset_faxpar(info); -#endif - m->mdmcmdl = 0; -} - -static void -modem_write_profile(atemu *m) -{ - memcpy(m->profile, m->mdmreg, ISDN_MODEM_NUMREG); - memcpy(m->pmsn, m->msn, ISDN_MSNLEN); - memcpy(m->plmsn, m->lmsn, ISDN_LMSNLEN); - if (dev->profd) - send_sig(SIGIO, dev->profd, 1); -} - -static const struct tty_operations modem_ops = { - .install = isdn_tty_install, - .open = isdn_tty_open, - .close = isdn_tty_close, - .write = isdn_tty_write, - .flush_chars = isdn_tty_flush_chars, - .write_room = isdn_tty_write_room, - .chars_in_buffer = isdn_tty_chars_in_buffer, - .flush_buffer = isdn_tty_flush_buffer, - .ioctl = isdn_tty_ioctl, - .throttle = isdn_tty_throttle, - .unthrottle = isdn_tty_unthrottle, - .set_termios = isdn_tty_set_termios, - .hangup = isdn_tty_hangup, - .tiocmget = isdn_tty_tiocmget, - .tiocmset = isdn_tty_tiocmset, -}; - -static int isdn_tty_carrier_raised(struct tty_port *port) -{ - modem_info *info = container_of(port, modem_info, port); - return info->msr & UART_MSR_DCD; -} - -static const struct tty_port_operations isdn_tty_port_ops = { - .carrier_raised = isdn_tty_carrier_raised, -}; - -int -isdn_tty_modem_init(void) -{ - isdn_modem_t *m; - int i, retval; - modem_info *info; - - m = &dev->mdm; - m->tty_modem = alloc_tty_driver(ISDN_MAX_CHANNELS); - if (!m->tty_modem) - return -ENOMEM; - m->tty_modem->name = "ttyI"; - m->tty_modem->major = ISDN_TTY_MAJOR; - m->tty_modem->minor_start = 0; - m->tty_modem->type = TTY_DRIVER_TYPE_SERIAL; - m->tty_modem->subtype = SERIAL_TYPE_NORMAL; - m->tty_modem->init_termios = tty_std_termios; - m->tty_modem->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; - m->tty_modem->flags = TTY_DRIVER_REAL_RAW; - m->tty_modem->driver_name = "isdn_tty"; - tty_set_operations(m->tty_modem, &modem_ops); - retval = tty_register_driver(m->tty_modem); - if (retval) { - printk(KERN_WARNING "isdn_tty: Couldn't register modem-device\n"); - goto err; - } - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - info = &m->info[i]; -#ifdef CONFIG_ISDN_TTY_FAX - if (!(info->fax = kmalloc(sizeof(T30_s), GFP_KERNEL))) { - printk(KERN_ERR "Could not allocate fax t30-buffer\n"); - retval = -ENOMEM; - goto err_unregister; - } -#endif - tty_port_init(&info->port); - info->port.ops = &isdn_tty_port_ops; - spin_lock_init(&info->readlock); - sprintf(info->last_cause, "0000"); - sprintf(info->last_num, "none"); - info->last_dir = 0; - info->last_lhup = 1; - info->last_l2 = -1; - info->last_si = 0; - isdn_tty_reset_profile(&info->emu); - isdn_tty_modem_reset_regs(info, 1); - info->magic = ISDN_ASYNC_MAGIC; - info->line = i; - info->x_char = 0; - info->isdn_driver = -1; - info->isdn_channel = -1; - info->drv_index = -1; - info->xmit_size = ISDN_SERIAL_XMIT_SIZE; - timer_setup(&info->nc_timer, isdn_tty_modem_do_ncarrier, 0); - skb_queue_head_init(&info->xmit_queue); -#ifdef CONFIG_ISDN_AUDIO - skb_queue_head_init(&info->dtmf_queue); -#endif - info->port.xmit_buf = kmalloc(ISDN_SERIAL_XMIT_MAX + 5, - GFP_KERNEL); - if (!info->port.xmit_buf) { - printk(KERN_ERR "Could not allocate modem xmit-buffer\n"); - retval = -ENOMEM; - goto err_unregister; - } - /* Make room for T.70 header */ - info->port.xmit_buf += 4; - } - return 0; -err_unregister: - for (i--; i >= 0; i--) { - info = &m->info[i]; -#ifdef CONFIG_ISDN_TTY_FAX - kfree(info->fax); -#endif - kfree(info->port.xmit_buf - 4); - info->port.xmit_buf = NULL; - tty_port_destroy(&info->port); - } - tty_unregister_driver(m->tty_modem); -err: - put_tty_driver(m->tty_modem); - m->tty_modem = NULL; - return retval; -} - -void -isdn_tty_exit(void) -{ - modem_info *info; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - info = &dev->mdm.info[i]; - isdn_tty_cleanup_xmit(info); -#ifdef CONFIG_ISDN_TTY_FAX - kfree(info->fax); -#endif - kfree(info->port.xmit_buf - 4); - info->port.xmit_buf = NULL; - tty_port_destroy(&info->port); - } - tty_unregister_driver(dev->mdm.tty_modem); - put_tty_driver(dev->mdm.tty_modem); - dev->mdm.tty_modem = NULL; -} - - -/* - * isdn_tty_match_icall(char *MSN, atemu *tty_emulator, int dev_idx) - * match the MSN against the MSNs (glob patterns) defined for tty_emulator, - * and return 0 for match, 1 for no match, 2 if MSN could match if longer. - */ - -static int -isdn_tty_match_icall(char *cid, atemu *emu, int di) -{ -#ifdef ISDN_DEBUG_MODEM_ICALL - printk(KERN_DEBUG "m_fi: msn=%s lmsn=%s mmsn=%s mreg[SI1]=%d mreg[SI2]=%d\n", - emu->msn, emu->lmsn, isdn_map_eaz2msn(emu->msn, di), - emu->mdmreg[REG_SI1], emu->mdmreg[REG_SI2]); -#endif - if (strlen(emu->lmsn)) { - char *p = emu->lmsn; - char *q; - int tmp; - int ret = 0; - - while (1) { - if ((q = strchr(p, ';'))) - *q = '\0'; - if ((tmp = isdn_msncmp(cid, isdn_map_eaz2msn(p, di))) > ret) - ret = tmp; -#ifdef ISDN_DEBUG_MODEM_ICALL - printk(KERN_DEBUG "m_fi: lmsnX=%s mmsn=%s -> tmp=%d\n", - p, isdn_map_eaz2msn(emu->msn, di), tmp); -#endif - if (q) { - *q = ';'; - p = q; - p++; - } - if (!tmp) - return 0; - if (!q) - break; - } - return ret; - } else { - int tmp; - tmp = isdn_msncmp(cid, isdn_map_eaz2msn(emu->msn, di)); -#ifdef ISDN_DEBUG_MODEM_ICALL - printk(KERN_DEBUG "m_fi: mmsn=%s -> tmp=%d\n", - isdn_map_eaz2msn(emu->msn, di), tmp); -#endif - return tmp; - } -} - -/* - * An incoming call-request has arrived. - * Search the tty-devices for an appropriate device and bind - * it to the ISDN-Channel. - * Return: - * - * 0 = No matching device found. - * 1 = A matching device found. - * 3 = No match found, but eventually would match, if - * CID is longer. - */ -int -isdn_tty_find_icall(int di, int ch, setup_parm *setup) -{ - char *eaz; - int i; - int wret; - int idx; - int si1; - int si2; - char *nr; - ulong flags; - - if (!setup->phone[0]) { - nr = "0"; - printk(KERN_INFO "isdn_tty: Incoming call without OAD, assuming '0'\n"); - } else - nr = setup->phone; - si1 = (int) setup->si1; - si2 = (int) setup->si2; - if (!setup->eazmsn[0]) { - printk(KERN_WARNING "isdn_tty: Incoming call without CPN, assuming '0'\n"); - eaz = "0"; - } else - eaz = setup->eazmsn; -#ifdef ISDN_DEBUG_MODEM_ICALL - printk(KERN_DEBUG "m_fi: eaz=%s si1=%d si2=%d\n", eaz, si1, si2); -#endif - wret = 0; - spin_lock_irqsave(&dev->lock, flags); - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - modem_info *info = &dev->mdm.info[i]; - - if (info->port.count == 0) - continue; - if ((info->emu.mdmreg[REG_SI1] & si2bit[si1]) && /* SI1 is matching */ - (info->emu.mdmreg[REG_SI2] == si2)) { /* SI2 is matching */ - idx = isdn_dc2minor(di, ch); -#ifdef ISDN_DEBUG_MODEM_ICALL - printk(KERN_DEBUG "m_fi: match1 wret=%d\n", wret); - printk(KERN_DEBUG "m_fi: idx=%d flags=%08lx drv=%d ch=%d usg=%d\n", idx, - info->port.flags, info->isdn_driver, - info->isdn_channel, dev->usage[idx]); -#endif - if ( -#ifndef FIX_FILE_TRANSFER - tty_port_active(&info->port) && -#endif - (info->isdn_driver == -1) && - (info->isdn_channel == -1) && - (USG_NONE(dev->usage[idx]))) { - int matchret; - - if ((matchret = isdn_tty_match_icall(eaz, &info->emu, di)) > wret) - wret = matchret; - if (!matchret) { /* EAZ is matching */ - info->isdn_driver = di; - info->isdn_channel = ch; - info->drv_index = idx; - dev->m_idx[idx] = info->line; - dev->usage[idx] &= ISDN_USAGE_EXCLUSIVE; - dev->usage[idx] |= isdn_calc_usage(si1, info->emu.mdmreg[REG_L2PROT]); - strcpy(dev->num[idx], nr); - strcpy(info->emu.cpn, eaz); - info->emu.mdmreg[REG_SI1I] = si2bit[si1]; - info->emu.mdmreg[REG_PLAN] = setup->plan; - info->emu.mdmreg[REG_SCREEN] = setup->screen; - isdn_info_update(); - spin_unlock_irqrestore(&dev->lock, flags); - printk(KERN_INFO "isdn_tty: call from %s, -> RING on ttyI%d\n", nr, - info->line); - info->msr |= UART_MSR_RI; - isdn_tty_modem_result(RESULT_RING, info); - isdn_timer_ctrl(ISDN_TIMER_MODEMRING, 1); - return 1; - } - } - } - } - spin_unlock_irqrestore(&dev->lock, flags); - printk(KERN_INFO "isdn_tty: call from %s -> %s %s\n", nr, eaz, - ((dev->drv[di]->flags & DRV_FLAG_REJBUS) && (wret != 2)) ? "rejected" : "ignored"); - return (wret == 2) ? 3 : 0; -} - -int -isdn_tty_stat_callback(int i, isdn_ctrl *c) -{ - int mi; - modem_info *info; - char *e; - - if (i < 0) - return 0; - if ((mi = dev->m_idx[i]) >= 0) { - info = &dev->mdm.info[mi]; - switch (c->command) { - case ISDN_STAT_CINF: - printk(KERN_DEBUG "CHARGEINFO on ttyI%d: %ld %s\n", info->line, c->arg, c->parm.num); - info->emu.charge = (unsigned) simple_strtoul(c->parm.num, &e, 10); - if (e == (char *)c->parm.num) - info->emu.charge = 0; - - break; - case ISDN_STAT_BSENT: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_BSENT ttyI%d\n", info->line); -#endif - if ((info->isdn_driver == c->driver) && - (info->isdn_channel == c->arg)) { - info->msr |= UART_MSR_CTS; - if (info->send_outstanding) - if (!(--info->send_outstanding)) - info->lsr |= UART_LSR_TEMT; - isdn_tty_tint(info); - return 1; - } - break; - case ISDN_STAT_CAUSE: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_CAUSE ttyI%d\n", info->line); -#endif - /* Signal cause to tty-device */ - strncpy(info->last_cause, c->parm.num, 5); - return 1; - case ISDN_STAT_DISPLAY: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_DISPLAY ttyI%d\n", info->line); -#endif - /* Signal display to tty-device */ - if ((info->emu.mdmreg[REG_DISPLAY] & BIT_DISPLAY) && - !(info->emu.mdmreg[REG_RESPNUM] & BIT_RESPNUM)) { - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout("DISPLAY: ", info); - isdn_tty_at_cout(c->parm.display, info); - isdn_tty_at_cout("\r\n", info); - } - return 1; - case ISDN_STAT_DCONN: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_DCONN ttyI%d\n", info->line); -#endif - if (tty_port_active(&info->port)) { - if (info->dialing == 1) { - info->dialing = 2; - return 1; - } - } - break; - case ISDN_STAT_DHUP: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_DHUP ttyI%d\n", info->line); -#endif - if (tty_port_active(&info->port)) { - if (info->dialing == 1) - isdn_tty_modem_result(RESULT_BUSY, info); - if (info->dialing > 1) - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - info->dialing = 0; -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in ISDN_STAT_DHUP\n"); -#endif - isdn_tty_modem_hup(info, 0); - return 1; - } - break; - case ISDN_STAT_BCONN: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_BCONN ttyI%d\n", info->line); -#endif - /* Wake up any processes waiting - * for incoming call of this device when - * DCD follow the state of incoming carrier - */ - if (info->port.blocked_open && - (info->emu.mdmreg[REG_DCD] & BIT_DCD)) { - wake_up_interruptible(&info->port.open_wait); - } - - /* Schedule CONNECT-Message to any tty - * waiting for it and - * set DCD-bit of its modem-status. - */ - if (tty_port_active(&info->port) || - (info->port.blocked_open && - (info->emu.mdmreg[REG_DCD] & BIT_DCD))) { - info->msr |= UART_MSR_DCD; - info->emu.charge = 0; - if (info->dialing & 0xf) - info->last_dir = 1; - else - info->last_dir = 0; - info->dialing = 0; - info->rcvsched = 1; - if (USG_MODEM(dev->usage[i])) { - if (info->emu.mdmreg[REG_L2PROT] == ISDN_PROTO_L2_MODEM) { - strcpy(info->emu.connmsg, c->parm.num); - isdn_tty_modem_result(RESULT_CONNECT, info); - } else - isdn_tty_modem_result(RESULT_CONNECT64000, info); - } - if (USG_VOICE(dev->usage[i])) - isdn_tty_modem_result(RESULT_VCON, info); - return 1; - } - break; - case ISDN_STAT_BHUP: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_BHUP ttyI%d\n", info->line); -#endif - if (tty_port_active(&info->port)) { -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in ISDN_STAT_BHUP\n"); -#endif - isdn_tty_modem_hup(info, 0); - return 1; - } - break; - case ISDN_STAT_NODCH: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_NODCH ttyI%d\n", info->line); -#endif - if (tty_port_active(&info->port)) { - if (info->dialing) { - info->dialing = 0; - info->last_l2 = -1; - info->last_si = 0; - sprintf(info->last_cause, "0000"); - isdn_tty_modem_result(RESULT_NO_DIALTONE, info); - } - isdn_tty_modem_hup(info, 0); - return 1; - } - break; - case ISDN_STAT_UNLOAD: -#ifdef ISDN_TTY_STAT_DEBUG - printk(KERN_DEBUG "tty_STAT_UNLOAD ttyI%d\n", info->line); -#endif - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - info = &dev->mdm.info[i]; - if (info->isdn_driver == c->driver) { - if (info->online) - isdn_tty_modem_hup(info, 1); - } - } - return 1; -#ifdef CONFIG_ISDN_TTY_FAX - case ISDN_STAT_FAXIND: - if (tty_port_active(&info->port)) { - isdn_tty_fax_command(info, c); - } - break; -#endif -#ifdef CONFIG_ISDN_AUDIO - case ISDN_STAT_AUDIO: - if (tty_port_active(&info->port)) { - switch (c->parm.num[0]) { - case ISDN_AUDIO_DTMF: - if (info->vonline) { - isdn_audio_put_dle_code(info, - c->parm.num[1]); - } - break; - } - } - break; -#endif - } - } - return 0; -} - -/********************************************************************* - Modem-Emulator-Routines -*********************************************************************/ - -#define cmdchar(c) ((c >= ' ') && (c <= 0x7f)) - -/* - * Put a message from the AT-emulator into receive-buffer of tty, - * convert CR, LF, and BS to values in modem-registers 3, 4 and 5. - */ -void -isdn_tty_at_cout(char *msg, modem_info *info) -{ - struct tty_port *port = &info->port; - atemu *m = &info->emu; - char *p; - char c; - u_long flags; - struct sk_buff *skb = NULL; - char *sp = NULL; - int l; - - if (!msg) { - printk(KERN_WARNING "isdn_tty: Null-Message in isdn_tty_at_cout\n"); - return; - } - - l = strlen(msg); - - spin_lock_irqsave(&info->readlock, flags); - if (info->closing) { - spin_unlock_irqrestore(&info->readlock, flags); - return; - } - - /* use queue instead of direct, if online and */ - /* data is in queue or buffer is full */ - if (info->online && ((tty_buffer_request_room(port, l) < l) || - !skb_queue_empty(&dev->drv[info->isdn_driver]->rpqueue[info->isdn_channel]))) { - skb = alloc_skb(l, GFP_ATOMIC); - if (!skb) { - spin_unlock_irqrestore(&info->readlock, flags); - return; - } - sp = skb_put(skb, l); -#ifdef CONFIG_ISDN_AUDIO - ISDN_AUDIO_SKB_DLECOUNT(skb) = 0; - ISDN_AUDIO_SKB_LOCK(skb) = 0; -#endif - } - - for (p = msg; *p; p++) { - switch (*p) { - case '\r': - c = m->mdmreg[REG_CR]; - break; - case '\n': - c = m->mdmreg[REG_LF]; - break; - case '\b': - c = m->mdmreg[REG_BS]; - break; - default: - c = *p; - } - if (skb) { - *sp++ = c; - } else { - if (tty_insert_flip_char(port, c, TTY_NORMAL) == 0) - break; - } - } - if (skb) { - __skb_queue_tail(&dev->drv[info->isdn_driver]->rpqueue[info->isdn_channel], skb); - dev->drv[info->isdn_driver]->rcvcount[info->isdn_channel] += skb->len; - spin_unlock_irqrestore(&info->readlock, flags); - /* Schedule dequeuing */ - if (dev->modempoll && info->rcvsched) - isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); - - } else { - spin_unlock_irqrestore(&info->readlock, flags); - tty_flip_buffer_push(port); - } -} - -/* - * Perform ATH Hangup - */ -static void -isdn_tty_on_hook(modem_info *info) -{ - if (info->isdn_channel >= 0) { -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "Mhup in isdn_tty_on_hook\n"); -#endif - isdn_tty_modem_hup(info, 1); - } -} - -static void -isdn_tty_off_hook(void) -{ - printk(KERN_DEBUG "isdn_tty_off_hook\n"); -} - -#define PLUSWAIT1 (HZ / 2) /* 0.5 sec. */ -#define PLUSWAIT2 (HZ * 3 / 2) /* 1.5 sec */ - -/* - * Check Buffer for Modem-escape-sequence, activate timer-callback to - * isdn_tty_modem_escape() if sequence found. - * - * Parameters: - * p pointer to databuffer - * plus escape-character - * count length of buffer - * pluscount count of valid escape-characters so far - * lastplus timestamp of last character - */ -static void -isdn_tty_check_esc(const u_char *p, u_char plus, int count, int *pluscount, - u_long *lastplus) -{ - if (plus > 127) - return; - if (count > 3) { - p += count - 3; - count = 3; - *pluscount = 0; - } - while (count > 0) { - if (*(p++) == plus) { - if ((*pluscount)++) { - /* Time since last '+' > 0.5 sec. ? */ - if (time_after(jiffies, *lastplus + PLUSWAIT1)) - *pluscount = 1; - } else { - /* Time since last non-'+' < 1.5 sec. ? */ - if (time_before(jiffies, *lastplus + PLUSWAIT2)) - *pluscount = 0; - } - if ((*pluscount == 3) && (count == 1)) - isdn_timer_ctrl(ISDN_TIMER_MODEMPLUS, 1); - if (*pluscount > 3) - *pluscount = 1; - } else - *pluscount = 0; - *lastplus = jiffies; - count--; - } -} - -/* - * Return result of AT-emulator to tty-receive-buffer, depending on - * modem-register 12, bit 0 and 1. - * For CONNECT-messages also switch to online-mode. - * For RING-message handle auto-ATA if register 0 != 0 - */ - -static void -isdn_tty_modem_result(int code, modem_info *info) -{ - atemu *m = &info->emu; - static char *msg[] = - {"OK", "CONNECT", "RING", "NO CARRIER", "ERROR", - "CONNECT 64000", "NO DIALTONE", "BUSY", "NO ANSWER", - "RINGING", "NO MSN/EAZ", "VCON", "RUNG"}; - char s[ISDN_MSNLEN + 10]; - - switch (code) { - case RESULT_RING: - m->mdmreg[REG_RINGCNT]++; - if (m->mdmreg[REG_RINGCNT] == m->mdmreg[REG_RINGATA]) - /* Automatically accept incoming call */ - isdn_tty_cmd_ATA(info); - break; - case RESULT_NO_CARRIER: -#ifdef ISDN_DEBUG_MODEM_HUP - printk(KERN_DEBUG "modem_result: NO CARRIER %d %d\n", - info->closing, !info->port.tty); -#endif - m->mdmreg[REG_RINGCNT] = 0; - del_timer(&info->nc_timer); - info->ncarrier = 0; - if (info->closing || !info->port.tty) - return; - -#ifdef CONFIG_ISDN_AUDIO - if (info->vonline & 1) { -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG "res3: send DLE-ETX on ttyI%d\n", - info->line); -#endif - /* voice-recording, add DLE-ETX */ - isdn_tty_at_cout("\020\003", info); - } - if (info->vonline & 2) { -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG "res3: send DLE-DC4 on ttyI%d\n", - info->line); -#endif - /* voice-playing, add DLE-DC4 */ - isdn_tty_at_cout("\020\024", info); - } -#endif - break; - case RESULT_CONNECT: - case RESULT_CONNECT64000: - sprintf(info->last_cause, "0000"); - if (!info->online) - info->online = 2; - break; - case RESULT_VCON: -#ifdef ISDN_DEBUG_MODEM_VOICE - printk(KERN_DEBUG "res3: send VCON on ttyI%d\n", - info->line); -#endif - sprintf(info->last_cause, "0000"); - if (!info->online) - info->online = 1; - break; - } /* switch (code) */ - - if (m->mdmreg[REG_RESP] & BIT_RESP) { - /* Show results */ - if (m->mdmreg[REG_RESPNUM] & BIT_RESPNUM) { - /* Show numeric results only */ - sprintf(s, "\r\n%d\r\n", code); - isdn_tty_at_cout(s, info); - } else { - if (code == RESULT_RING) { - /* return if "show RUNG" and ringcounter>1 */ - if ((m->mdmreg[REG_RUNG] & BIT_RUNG) && - (m->mdmreg[REG_RINGCNT] > 1)) - return; - /* print CID, _before_ _every_ ring */ - if (!(m->mdmreg[REG_CIDONCE] & BIT_CIDONCE)) { - isdn_tty_at_cout("\r\nCALLER NUMBER: ", info); - isdn_tty_at_cout(dev->num[info->drv_index], info); - if (m->mdmreg[REG_CDN] & BIT_CDN) { - isdn_tty_at_cout("\r\nCALLED NUMBER: ", info); - isdn_tty_at_cout(info->emu.cpn, info); - } - } - } - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout(msg[code], info); - switch (code) { - case RESULT_CONNECT: - switch (m->mdmreg[REG_L2PROT]) { - case ISDN_PROTO_L2_MODEM: - isdn_tty_at_cout(" ", info); - isdn_tty_at_cout(m->connmsg, info); - break; - } - break; - case RESULT_RING: - /* Append CPN, if enabled */ - if ((m->mdmreg[REG_CPN] & BIT_CPN)) { - sprintf(s, "/%s", m->cpn); - isdn_tty_at_cout(s, info); - } - /* Print CID only once, _after_ 1st RING */ - if ((m->mdmreg[REG_CIDONCE] & BIT_CIDONCE) && - (m->mdmreg[REG_RINGCNT] == 1)) { - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout("CALLER NUMBER: ", info); - isdn_tty_at_cout(dev->num[info->drv_index], info); - if (m->mdmreg[REG_CDN] & BIT_CDN) { - isdn_tty_at_cout("\r\nCALLED NUMBER: ", info); - isdn_tty_at_cout(info->emu.cpn, info); - } - } - break; - case RESULT_NO_CARRIER: - case RESULT_NO_DIALTONE: - case RESULT_BUSY: - case RESULT_NO_ANSWER: - m->mdmreg[REG_RINGCNT] = 0; - /* Append Cause-Message if enabled */ - if (m->mdmreg[REG_RESPXT] & BIT_RESPXT) { - sprintf(s, "/%s", info->last_cause); - isdn_tty_at_cout(s, info); - } - break; - case RESULT_CONNECT64000: - /* Append Protocol to CONNECT message */ - switch (m->mdmreg[REG_L2PROT]) { - case ISDN_PROTO_L2_X75I: - case ISDN_PROTO_L2_X75UI: - case ISDN_PROTO_L2_X75BUI: - isdn_tty_at_cout("/X.75", info); - break; - case ISDN_PROTO_L2_HDLC: - isdn_tty_at_cout("/HDLC", info); - break; - case ISDN_PROTO_L2_V11096: - isdn_tty_at_cout("/V110/9600", info); - break; - case ISDN_PROTO_L2_V11019: - isdn_tty_at_cout("/V110/19200", info); - break; - case ISDN_PROTO_L2_V11038: - isdn_tty_at_cout("/V110/38400", info); - break; - } - if (m->mdmreg[REG_T70] & BIT_T70) { - isdn_tty_at_cout("/T.70", info); - if (m->mdmreg[REG_T70] & BIT_T70_EXT) - isdn_tty_at_cout("+", info); - } - break; - } - isdn_tty_at_cout("\r\n", info); - } - } - if (code == RESULT_NO_CARRIER) { - if (info->closing || (!info->port.tty)) - return; - - if (tty_port_check_carrier(&info->port)) - tty_hangup(info->port.tty); - } -} - - -/* - * Display a modem-register-value. - */ -static void -isdn_tty_show_profile(int ridx, modem_info *info) -{ - char v[6]; - - sprintf(v, "\r\n%d", info->emu.mdmreg[ridx]); - isdn_tty_at_cout(v, info); -} - -/* - * Get MSN-string from char-pointer, set pointer to end of number - */ -static void -isdn_tty_get_msnstr(char *n, char **p) -{ - int limit = ISDN_MSNLEN - 1; - - while (((*p[0] >= '0' && *p[0] <= '9') || - /* Why a comma ??? */ - (*p[0] == ',') || (*p[0] == ':')) && - (limit--)) - *n++ = *p[0]++; - *n = '\0'; -} - -/* - * Get phone-number from modem-commandbuffer - */ -static void -isdn_tty_getdial(char *p, char *q, int cnt) -{ - int first = 1; - int limit = ISDN_MSNLEN - 1; /* MUST match the size of interface var to avoid - buffer overflow */ - - while (strchr(" 0123456789,#.*WPTSR-", *p) && *p && --cnt > 0) { - if ((*p >= '0' && *p <= '9') || ((*p == 'S') && first) || - ((*p == 'R') && first) || - (*p == '*') || (*p == '#')) { - *q++ = *p; - limit--; - } - if (!limit) - break; - p++; - first = 0; - } - *q = 0; -} - -#define PARSE_ERROR { isdn_tty_modem_result(RESULT_ERROR, info); return; } -#define PARSE_ERROR1 { isdn_tty_modem_result(RESULT_ERROR, info); return 1; } - -static void -isdn_tty_report(modem_info *info) -{ - atemu *m = &info->emu; - char s[80]; - - isdn_tty_at_cout("\r\nStatistics of last connection:\r\n\r\n", info); - sprintf(s, " Remote Number: %s\r\n", info->last_num); - isdn_tty_at_cout(s, info); - sprintf(s, " Direction: %s\r\n", info->last_dir ? "outgoing" : "incoming"); - isdn_tty_at_cout(s, info); - isdn_tty_at_cout(" Layer-2 Protocol: ", info); - switch (info->last_l2) { - case ISDN_PROTO_L2_X75I: - isdn_tty_at_cout("X.75i", info); - break; - case ISDN_PROTO_L2_X75UI: - isdn_tty_at_cout("X.75ui", info); - break; - case ISDN_PROTO_L2_X75BUI: - isdn_tty_at_cout("X.75bui", info); - break; - case ISDN_PROTO_L2_HDLC: - isdn_tty_at_cout("HDLC", info); - break; - case ISDN_PROTO_L2_V11096: - isdn_tty_at_cout("V.110 9600 Baud", info); - break; - case ISDN_PROTO_L2_V11019: - isdn_tty_at_cout("V.110 19200 Baud", info); - break; - case ISDN_PROTO_L2_V11038: - isdn_tty_at_cout("V.110 38400 Baud", info); - break; - case ISDN_PROTO_L2_TRANS: - isdn_tty_at_cout("transparent", info); - break; - case ISDN_PROTO_L2_MODEM: - isdn_tty_at_cout("modem", info); - break; - case ISDN_PROTO_L2_FAX: - isdn_tty_at_cout("fax", info); - break; - default: - isdn_tty_at_cout("unknown", info); - break; - } - if (m->mdmreg[REG_T70] & BIT_T70) { - isdn_tty_at_cout("/T.70", info); - if (m->mdmreg[REG_T70] & BIT_T70_EXT) - isdn_tty_at_cout("+", info); - } - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout(" Service: ", info); - switch (info->last_si) { - case 1: - isdn_tty_at_cout("audio\r\n", info); - break; - case 5: - isdn_tty_at_cout("btx\r\n", info); - break; - case 7: - isdn_tty_at_cout("data\r\n", info); - break; - default: - sprintf(s, "%d\r\n", info->last_si); - isdn_tty_at_cout(s, info); - break; - } - sprintf(s, " Hangup location: %s\r\n", info->last_lhup ? "local" : "remote"); - isdn_tty_at_cout(s, info); - sprintf(s, " Last cause: %s\r\n", info->last_cause); - isdn_tty_at_cout(s, info); -} - -/* - * Parse AT&.. commands. - */ -static int -isdn_tty_cmd_ATand(char **p, modem_info *info) -{ - atemu *m = &info->emu; - int i; - char rb[100]; - -#define MAXRB (sizeof(rb) - 1) - - switch (*p[0]) { - case 'B': - /* &B - Set Buffersize */ - p[0]++; - i = isdn_getnum(p); - if ((i < 0) || (i > ISDN_SERIAL_XMIT_MAX)) - PARSE_ERROR1; -#ifdef CONFIG_ISDN_AUDIO - if ((m->mdmreg[REG_SI1] & 1) && (i > VBUF)) - PARSE_ERROR1; -#endif - m->mdmreg[REG_PSIZE] = i / 16; - info->xmit_size = m->mdmreg[REG_PSIZE] * 16; - switch (m->mdmreg[REG_L2PROT]) { - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - info->xmit_size /= 10; - } - break; - case 'C': - /* &C - DCD Status */ - p[0]++; - switch (isdn_getnum(p)) { - case 0: - m->mdmreg[REG_DCD] &= ~BIT_DCD; - break; - case 1: - m->mdmreg[REG_DCD] |= BIT_DCD; - break; - default: - PARSE_ERROR1 - } - break; - case 'D': - /* &D - Set DTR-Low-behavior */ - p[0]++; - switch (isdn_getnum(p)) { - case 0: - m->mdmreg[REG_DTRHUP] &= ~BIT_DTRHUP; - m->mdmreg[REG_DTRR] &= ~BIT_DTRR; - break; - case 2: - m->mdmreg[REG_DTRHUP] |= BIT_DTRHUP; - m->mdmreg[REG_DTRR] &= ~BIT_DTRR; - break; - case 3: - m->mdmreg[REG_DTRHUP] |= BIT_DTRHUP; - m->mdmreg[REG_DTRR] |= BIT_DTRR; - break; - default: - PARSE_ERROR1 - } - break; - case 'E': - /* &E -Set EAZ/MSN */ - p[0]++; - isdn_tty_get_msnstr(m->msn, p); - break; - case 'F': - /* &F -Set Factory-Defaults */ - p[0]++; - if (info->msr & UART_MSR_DCD) - PARSE_ERROR1; - isdn_tty_reset_profile(m); - isdn_tty_modem_reset_regs(info, 1); - break; -#ifdef DUMMY_HAYES_AT - case 'K': - /* only for be compilant with common scripts */ - /* &K Flowcontrol - no function */ - p[0]++; - isdn_getnum(p); - break; -#endif - case 'L': - /* &L -Set Numbers to listen on */ - p[0]++; - i = 0; - while (*p[0] && (strchr("0123456789,-*[]?;", *p[0])) && - (i < ISDN_LMSNLEN - 1)) - m->lmsn[i++] = *p[0]++; - m->lmsn[i] = '\0'; - break; - case 'R': - /* &R - Set V.110 bitrate adaption */ - p[0]++; - i = isdn_getnum(p); - switch (i) { - case 0: - /* Switch off V.110, back to X.75 */ - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_X75I; - m->mdmreg[REG_SI2] = 0; - info->xmit_size = m->mdmreg[REG_PSIZE] * 16; - break; - case 9600: - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_V11096; - m->mdmreg[REG_SI2] = 197; - info->xmit_size = m->mdmreg[REG_PSIZE] * 16 / 10; - break; - case 19200: - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_V11019; - m->mdmreg[REG_SI2] = 199; - info->xmit_size = m->mdmreg[REG_PSIZE] * 16 / 10; - break; - case 38400: - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_V11038; - m->mdmreg[REG_SI2] = 198; /* no existing standard for this */ - info->xmit_size = m->mdmreg[REG_PSIZE] * 16 / 10; - break; - default: - PARSE_ERROR1; - } - /* Switch off T.70 */ - m->mdmreg[REG_T70] &= ~(BIT_T70 | BIT_T70_EXT); - /* Set Service 7 */ - m->mdmreg[REG_SI1] |= 4; - break; - case 'S': - /* &S - Set Windowsize */ - p[0]++; - i = isdn_getnum(p); - if ((i > 0) && (i < 9)) - m->mdmreg[REG_WSIZE] = i; - else - PARSE_ERROR1; - break; - case 'V': - /* &V - Show registers */ - p[0]++; - isdn_tty_at_cout("\r\n", info); - for (i = 0; i < ISDN_MODEM_NUMREG; i++) { - sprintf(rb, "S%02d=%03d%s", i, - m->mdmreg[i], ((i + 1) % 10) ? " " : "\r\n"); - isdn_tty_at_cout(rb, info); - } - sprintf(rb, "\r\nEAZ/MSN: %.50s\r\n", - strlen(m->msn) ? m->msn : "None"); - isdn_tty_at_cout(rb, info); - if (strlen(m->lmsn)) { - isdn_tty_at_cout("\r\nListen: ", info); - isdn_tty_at_cout(m->lmsn, info); - isdn_tty_at_cout("\r\n", info); - } - break; - case 'W': - /* &W - Write Profile */ - p[0]++; - switch (*p[0]) { - case '0': - p[0]++; - modem_write_profile(m); - break; - default: - PARSE_ERROR1; - } - break; - case 'X': - /* &X - Switch to BTX-Mode and T.70 */ - p[0]++; - switch (isdn_getnum(p)) { - case 0: - m->mdmreg[REG_T70] &= ~(BIT_T70 | BIT_T70_EXT); - info->xmit_size = m->mdmreg[REG_PSIZE] * 16; - break; - case 1: - m->mdmreg[REG_T70] |= BIT_T70; - m->mdmreg[REG_T70] &= ~BIT_T70_EXT; - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_X75I; - info->xmit_size = 112; - m->mdmreg[REG_SI1] = 4; - m->mdmreg[REG_SI2] = 0; - break; - case 2: - m->mdmreg[REG_T70] |= (BIT_T70 | BIT_T70_EXT); - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_X75I; - info->xmit_size = 112; - m->mdmreg[REG_SI1] = 4; - m->mdmreg[REG_SI2] = 0; - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - return 0; -} - -static int -isdn_tty_check_ats(int mreg, int mval, modem_info *info, atemu *m) -{ - /* Some plausibility checks */ - switch (mreg) { - case REG_L2PROT: - if (mval > ISDN_PROTO_L2_MAX) - return 1; - break; - case REG_PSIZE: - if ((mval * 16) > ISDN_SERIAL_XMIT_MAX) - return 1; -#ifdef CONFIG_ISDN_AUDIO - if ((m->mdmreg[REG_SI1] & 1) && (mval > VBUFX)) - return 1; -#endif - info->xmit_size = mval * 16; - switch (m->mdmreg[REG_L2PROT]) { - case ISDN_PROTO_L2_V11096: - case ISDN_PROTO_L2_V11019: - case ISDN_PROTO_L2_V11038: - info->xmit_size /= 10; - } - break; - case REG_SI1I: - case REG_PLAN: - case REG_SCREEN: - /* readonly registers */ - return 1; - } - return 0; -} - -/* - * Perform ATS command - */ -static int -isdn_tty_cmd_ATS(char **p, modem_info *info) -{ - atemu *m = &info->emu; - int bitpos; - int mreg; - int mval; - int bval; - - mreg = isdn_getnum(p); - if (mreg < 0 || mreg >= ISDN_MODEM_NUMREG) - PARSE_ERROR1; - switch (*p[0]) { - case '=': - p[0]++; - mval = isdn_getnum(p); - if (mval < 0 || mval > 255) - PARSE_ERROR1; - if (isdn_tty_check_ats(mreg, mval, info, m)) - PARSE_ERROR1; - m->mdmreg[mreg] = mval; - break; - case '.': - /* Set/Clear a single bit */ - p[0]++; - bitpos = isdn_getnum(p); - if ((bitpos < 0) || (bitpos > 7)) - PARSE_ERROR1; - switch (*p[0]) { - case '=': - p[0]++; - bval = isdn_getnum(p); - if (bval < 0 || bval > 1) - PARSE_ERROR1; - if (bval) - mval = m->mdmreg[mreg] | (1 << bitpos); - else - mval = m->mdmreg[mreg] & ~(1 << bitpos); - if (isdn_tty_check_ats(mreg, mval, info, m)) - PARSE_ERROR1; - m->mdmreg[mreg] = mval; - break; - case '?': - p[0]++; - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout((m->mdmreg[mreg] & (1 << bitpos)) ? "1" : "0", - info); - break; - default: - PARSE_ERROR1; - } - break; - case '?': - p[0]++; - isdn_tty_show_profile(mreg, info); - break; - default: - PARSE_ERROR1; - break; - } - return 0; -} - -/* - * Perform ATA command - */ -static void -isdn_tty_cmd_ATA(modem_info *info) -{ - atemu *m = &info->emu; - isdn_ctrl cmd; - int l2; - - if (info->msr & UART_MSR_RI) { - /* Accept incoming call */ - info->last_dir = 0; - strcpy(info->last_num, dev->num[info->drv_index]); - m->mdmreg[REG_RINGCNT] = 0; - info->msr &= ~UART_MSR_RI; - l2 = m->mdmreg[REG_L2PROT]; -#ifdef CONFIG_ISDN_AUDIO - /* If more than one bit set in reg18, autoselect Layer2 */ - if ((m->mdmreg[REG_SI1] & m->mdmreg[REG_SI1I]) != m->mdmreg[REG_SI1]) { - if (m->mdmreg[REG_SI1I] == 1) { - if ((l2 != ISDN_PROTO_L2_MODEM) && (l2 != ISDN_PROTO_L2_FAX)) - l2 = ISDN_PROTO_L2_TRANS; - } else - l2 = ISDN_PROTO_L2_X75I; - } -#endif - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL2; - cmd.arg = info->isdn_channel + (l2 << 8); - info->last_l2 = l2; - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_SETL3; - cmd.arg = info->isdn_channel + (m->mdmreg[REG_L3PROT] << 8); -#ifdef CONFIG_ISDN_TTY_FAX - if (l2 == ISDN_PROTO_L2_FAX) { - cmd.parm.fax = info->fax; - info->fax->direction = ISDN_TTY_FAX_CONN_IN; - } -#endif - isdn_command(&cmd); - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_ACCEPTD; - info->dialing = 16; - info->emu.carrierwait = 0; - isdn_command(&cmd); - isdn_timer_ctrl(ISDN_TIMER_CARRIER, 1); - } else - isdn_tty_modem_result(RESULT_NO_ANSWER, info); -} - -#ifdef CONFIG_ISDN_AUDIO -/* - * Parse AT+F.. commands - */ -static int -isdn_tty_cmd_PLUSF(char **p, modem_info *info) -{ - atemu *m = &info->emu; - char rs[20]; - - if (!strncmp(p[0], "CLASS", 5)) { - p[0] += 5; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", - (m->mdmreg[REG_SI1] & 1) ? 8 : 0); -#ifdef CONFIG_ISDN_TTY_FAX - if (TTY_IS_FCLASS2(info)) - sprintf(rs, "\r\n2"); - else if (TTY_IS_FCLASS1(info)) - sprintf(rs, "\r\n1"); -#endif - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - switch (*p[0]) { - case '0': - p[0]++; - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_X75I; - m->mdmreg[REG_L3PROT] = ISDN_PROTO_L3_TRANS; - m->mdmreg[REG_SI1] = 4; - info->xmit_size = - m->mdmreg[REG_PSIZE] * 16; - break; -#ifdef CONFIG_ISDN_TTY_FAX - case '1': - p[0]++; - if (!(dev->global_features & - ISDN_FEATURE_L3_FCLASS1)) - PARSE_ERROR1; - m->mdmreg[REG_SI1] = 1; - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_FAX; - m->mdmreg[REG_L3PROT] = ISDN_PROTO_L3_FCLASS1; - info->xmit_size = - m->mdmreg[REG_PSIZE] * 16; - break; - case '2': - p[0]++; - if (!(dev->global_features & - ISDN_FEATURE_L3_FCLASS2)) - PARSE_ERROR1; - m->mdmreg[REG_SI1] = 1; - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_FAX; - m->mdmreg[REG_L3PROT] = ISDN_PROTO_L3_FCLASS2; - info->xmit_size = - m->mdmreg[REG_PSIZE] * 16; - break; -#endif - case '8': - p[0]++; - /* L2 will change on dialout with si=1 */ - m->mdmreg[REG_L2PROT] = ISDN_PROTO_L2_X75I; - m->mdmreg[REG_L3PROT] = ISDN_PROTO_L3_TRANS; - m->mdmreg[REG_SI1] = 5; - info->xmit_size = VBUF; - break; - case '?': - p[0]++; - strcpy(rs, "\r\n0,"); -#ifdef CONFIG_ISDN_TTY_FAX - if (dev->global_features & - ISDN_FEATURE_L3_FCLASS1) - strcat(rs, "1,"); - if (dev->global_features & - ISDN_FEATURE_L3_FCLASS2) - strcat(rs, "2,"); -#endif - strcat(rs, "8"); - isdn_tty_at_cout(rs, info); - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - return 0; - } -#ifdef CONFIG_ISDN_TTY_FAX - return (isdn_tty_cmd_PLUSF_FAX(p, info)); -#else - PARSE_ERROR1; -#endif -} - -/* - * Parse AT+V.. commands - */ -static int -isdn_tty_cmd_PLUSV(char **p, modem_info *info) -{ - atemu *m = &info->emu; - isdn_ctrl cmd; - static char *vcmd[] = - {"NH", "IP", "LS", "RX", "SD", "SM", "TX", "DD", NULL}; - int i; - int par1; - int par2; - char rs[20]; - - i = 0; - while (vcmd[i]) { - if (!strncmp(vcmd[i], p[0], 2)) { - p[0] += 2; - break; - } - i++; - } - switch (i) { - case 0: - /* AT+VNH - Auto hangup feature */ - switch (*p[0]) { - case '?': - p[0]++; - isdn_tty_at_cout("\r\n1", info); - break; - case '=': - p[0]++; - switch (*p[0]) { - case '1': - p[0]++; - break; - case '?': - p[0]++; - isdn_tty_at_cout("\r\n1", info); - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - break; - case 1: - /* AT+VIP - Reset all voice parameters */ - isdn_tty_modem_reset_vpar(m); - break; - case 2: - /* AT+VLS - Select device, accept incoming call */ - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", m->vpar[0]); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - switch (*p[0]) { - case '0': - p[0]++; - m->vpar[0] = 0; - break; - case '2': - p[0]++; - m->vpar[0] = 2; - break; - case '?': - p[0]++; - isdn_tty_at_cout("\r\n0,2", info); - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - break; - case 3: - /* AT+VRX - Start recording */ - if (!m->vpar[0]) - PARSE_ERROR1; - if (info->online != 1) { - isdn_tty_modem_result(RESULT_NO_ANSWER, info); - return 1; - } - info->dtmf_state = isdn_audio_dtmf_init(info->dtmf_state); - if (!info->dtmf_state) { - printk(KERN_WARNING "isdn_tty: Couldn't malloc dtmf state\n"); - PARSE_ERROR1; - } - info->silence_state = isdn_audio_silence_init(info->silence_state); - if (!info->silence_state) { - printk(KERN_WARNING "isdn_tty: Couldn't malloc silence state\n"); - PARSE_ERROR1; - } - if (m->vpar[3] < 5) { - info->adpcmr = isdn_audio_adpcm_init(info->adpcmr, m->vpar[3]); - if (!info->adpcmr) { - printk(KERN_WARNING "isdn_tty: Couldn't malloc adpcm state\n"); - PARSE_ERROR1; - } - } -#ifdef ISDN_DEBUG_AT - printk(KERN_DEBUG "AT: +VRX\n"); -#endif - info->vonline |= 1; - isdn_tty_modem_result(RESULT_CONNECT, info); - return 0; - break; - case 4: - /* AT+VSD - Silence detection */ - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n<%d>,<%d>", - m->vpar[1], - m->vpar[2]); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if ((*p[0] >= '0') && (*p[0] <= '9')) { - par1 = isdn_getnum(p); - if ((par1 < 0) || (par1 > 31)) - PARSE_ERROR1; - if (*p[0] != ',') - PARSE_ERROR1; - p[0]++; - par2 = isdn_getnum(p); - if ((par2 < 0) || (par2 > 255)) - PARSE_ERROR1; - m->vpar[1] = par1; - m->vpar[2] = par2; - break; - } else - if (*p[0] == '?') { - p[0]++; - isdn_tty_at_cout("\r\n<0-31>,<0-255>", - info); - break; - } else - PARSE_ERROR1; - break; - default: - PARSE_ERROR1; - } - break; - case 5: - /* AT+VSM - Select compression */ - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n<%d>,<%d><8000>", - m->vpar[3], - m->vpar[1]); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - switch (*p[0]) { - case '2': - case '3': - case '4': - case '5': - case '6': - par1 = isdn_getnum(p); - if ((par1 < 2) || (par1 > 6)) - PARSE_ERROR1; - m->vpar[3] = par1; - break; - case '?': - p[0]++; - isdn_tty_at_cout("\r\n2;ADPCM;2;0;(8000)\r\n", - info); - isdn_tty_at_cout("3;ADPCM;3;0;(8000)\r\n", - info); - isdn_tty_at_cout("4;ADPCM;4;0;(8000)\r\n", - info); - isdn_tty_at_cout("5;ALAW;8;0;(8000)\r\n", - info); - isdn_tty_at_cout("6;ULAW;8;0;(8000)\r\n", - info); - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - break; - case 6: - /* AT+VTX - Start sending */ - if (!m->vpar[0]) - PARSE_ERROR1; - if (info->online != 1) { - isdn_tty_modem_result(RESULT_NO_ANSWER, info); - return 1; - } - info->dtmf_state = isdn_audio_dtmf_init(info->dtmf_state); - if (!info->dtmf_state) { - printk(KERN_WARNING "isdn_tty: Couldn't malloc dtmf state\n"); - PARSE_ERROR1; - } - if (m->vpar[3] < 5) { - info->adpcms = isdn_audio_adpcm_init(info->adpcms, m->vpar[3]); - if (!info->adpcms) { - printk(KERN_WARNING "isdn_tty: Couldn't malloc adpcm state\n"); - PARSE_ERROR1; - } - } -#ifdef ISDN_DEBUG_AT - printk(KERN_DEBUG "AT: +VTX\n"); -#endif - m->lastDLE = 0; - info->vonline |= 2; - isdn_tty_modem_result(RESULT_CONNECT, info); - return 0; - break; - case 7: - /* AT+VDD - DTMF detection */ - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n<%d>,<%d>", - m->vpar[4], - m->vpar[5]); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if ((*p[0] >= '0') && (*p[0] <= '9')) { - if (info->online != 1) - PARSE_ERROR1; - par1 = isdn_getnum(p); - if ((par1 < 0) || (par1 > 15)) - PARSE_ERROR1; - if (*p[0] != ',') - PARSE_ERROR1; - p[0]++; - par2 = isdn_getnum(p); - if ((par2 < 0) || (par2 > 255)) - PARSE_ERROR1; - m->vpar[4] = par1; - m->vpar[5] = par2; - cmd.driver = info->isdn_driver; - cmd.command = ISDN_CMD_AUDIO; - cmd.arg = info->isdn_channel + (ISDN_AUDIO_SETDD << 8); - cmd.parm.num[0] = par1; - cmd.parm.num[1] = par2; - isdn_command(&cmd); - break; - } else - if (*p[0] == '?') { - p[0]++; - isdn_tty_at_cout("\r\n<0-15>,<0-255>", - info); - break; - } else - PARSE_ERROR1; - break; - default: - PARSE_ERROR1; - } - break; - default: - PARSE_ERROR1; - } - return 0; -} -#endif /* CONFIG_ISDN_AUDIO */ - -/* - * Parse and perform an AT-command-line. - */ -static void -isdn_tty_parse_at(modem_info *info) -{ - atemu *m = &info->emu; - char *p; - char ds[ISDN_MSNLEN]; - -#ifdef ISDN_DEBUG_AT - printk(KERN_DEBUG "AT: '%s'\n", m->mdmcmd); -#endif - for (p = &m->mdmcmd[2]; *p;) { - switch (*p) { - case ' ': - p++; - break; - case 'A': - /* A - Accept incoming call */ - p++; - isdn_tty_cmd_ATA(info); - return; - case 'D': - /* D - Dial */ - if (info->msr & UART_MSR_DCD) - PARSE_ERROR; - if (info->msr & UART_MSR_RI) { - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - return; - } - isdn_tty_getdial(++p, ds, sizeof ds); - p += strlen(p); - if (!strlen(m->msn)) - isdn_tty_modem_result(RESULT_NO_MSN_EAZ, info); - else if (strlen(ds)) - isdn_tty_dial(ds, info, m); - else - PARSE_ERROR; - return; - case 'E': - /* E - Turn Echo on/off */ - p++; - switch (isdn_getnum(&p)) { - case 0: - m->mdmreg[REG_ECHO] &= ~BIT_ECHO; - break; - case 1: - m->mdmreg[REG_ECHO] |= BIT_ECHO; - break; - default: - PARSE_ERROR; - } - break; - case 'H': - /* H - On/Off-hook */ - p++; - switch (*p) { - case '0': - p++; - isdn_tty_on_hook(info); - break; - case '1': - p++; - isdn_tty_off_hook(); - break; - default: - isdn_tty_on_hook(info); - break; - } - break; - case 'I': - /* I - Information */ - p++; - isdn_tty_at_cout("\r\nLinux ISDN", info); - switch (*p) { - case '0': - case '1': - p++; - break; - case '2': - p++; - isdn_tty_report(info); - break; - case '3': - p++; - snprintf(ds, sizeof(ds), "\r\n%d", info->emu.charge); - isdn_tty_at_cout(ds, info); - break; - default:; - } - break; -#ifdef DUMMY_HAYES_AT - case 'L': - case 'M': - /* only for be compilant with common scripts */ - /* no function */ - p++; - isdn_getnum(&p); - break; -#endif - case 'O': - /* O - Go online */ - p++; - if (info->msr & UART_MSR_DCD) - /* if B-Channel is up */ - isdn_tty_modem_result((m->mdmreg[REG_L2PROT] == ISDN_PROTO_L2_MODEM) ? RESULT_CONNECT : RESULT_CONNECT64000, info); - else - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - return; - case 'Q': - /* Q - Turn Emulator messages on/off */ - p++; - switch (isdn_getnum(&p)) { - case 0: - m->mdmreg[REG_RESP] |= BIT_RESP; - break; - case 1: - m->mdmreg[REG_RESP] &= ~BIT_RESP; - break; - default: - PARSE_ERROR; - } - break; - case 'S': - /* S - Set/Get Register */ - p++; - if (isdn_tty_cmd_ATS(&p, info)) - return; - break; - case 'V': - /* V - Numeric or ASCII Emulator-messages */ - p++; - switch (isdn_getnum(&p)) { - case 0: - m->mdmreg[REG_RESP] |= BIT_RESPNUM; - break; - case 1: - m->mdmreg[REG_RESP] &= ~BIT_RESPNUM; - break; - default: - PARSE_ERROR; - } - break; - case 'Z': - /* Z - Load Registers from Profile */ - p++; - if (info->msr & UART_MSR_DCD) { - info->online = 0; - isdn_tty_on_hook(info); - } - isdn_tty_modem_reset_regs(info, 1); - break; - case '+': - p++; - switch (*p) { -#ifdef CONFIG_ISDN_AUDIO - case 'F': - p++; - if (isdn_tty_cmd_PLUSF(&p, info)) - return; - break; - case 'V': - if ((!(m->mdmreg[REG_SI1] & 1)) || - (m->mdmreg[REG_L2PROT] == ISDN_PROTO_L2_MODEM)) - PARSE_ERROR; - p++; - if (isdn_tty_cmd_PLUSV(&p, info)) - return; - break; -#endif /* CONFIG_ISDN_AUDIO */ - case 'S': /* SUSPEND */ - p++; - isdn_tty_get_msnstr(ds, &p); - isdn_tty_suspend(ds, info, m); - break; - case 'R': /* RESUME */ - p++; - isdn_tty_get_msnstr(ds, &p); - isdn_tty_resume(ds, info, m); - break; - case 'M': /* MESSAGE */ - p++; - isdn_tty_send_msg(info, m, p); - break; - default: - PARSE_ERROR; - } - break; - case '&': - p++; - if (isdn_tty_cmd_ATand(&p, info)) - return; - break; - default: - PARSE_ERROR; - } - } -#ifdef CONFIG_ISDN_AUDIO - if (!info->vonline) -#endif - isdn_tty_modem_result(RESULT_OK, info); -} - -/* Need own toupper() because standard-toupper is not available - * within modules. - */ -#define my_toupper(c) (((c >= 'a') && (c <= 'z')) ? (c & 0xdf) : c) - -/* - * Perform line-editing of AT-commands - * - * Parameters: - * p inputbuffer - * count length of buffer - * channel index to line (minor-device) - */ -static int -isdn_tty_edit_at(const char *p, int count, modem_info *info) -{ - atemu *m = &info->emu; - int total = 0; - u_char c; - char eb[2]; - int cnt; - - for (cnt = count; cnt > 0; p++, cnt--) { - c = *p; - total++; - if (c == m->mdmreg[REG_CR] || c == m->mdmreg[REG_LF]) { - /* Separator (CR or LF) */ - m->mdmcmd[m->mdmcmdl] = 0; - if (m->mdmreg[REG_ECHO] & BIT_ECHO) { - eb[0] = c; - eb[1] = 0; - isdn_tty_at_cout(eb, info); - } - if ((m->mdmcmdl >= 2) && (!(strncmp(m->mdmcmd, "AT", 2)))) - isdn_tty_parse_at(info); - m->mdmcmdl = 0; - continue; - } - if (c == m->mdmreg[REG_BS] && m->mdmreg[REG_BS] < 128) { - /* Backspace-Function */ - if ((m->mdmcmdl > 2) || (!m->mdmcmdl)) { - if (m->mdmcmdl) - m->mdmcmdl--; - if (m->mdmreg[REG_ECHO] & BIT_ECHO) - isdn_tty_at_cout("\b", info); - } - continue; - } - if (cmdchar(c)) { - if (m->mdmreg[REG_ECHO] & BIT_ECHO) { - eb[0] = c; - eb[1] = 0; - isdn_tty_at_cout(eb, info); - } - if (m->mdmcmdl < 255) { - c = my_toupper(c); - switch (m->mdmcmdl) { - case 1: - if (c == 'T') { - m->mdmcmd[m->mdmcmdl] = c; - m->mdmcmd[++m->mdmcmdl] = 0; - break; - } else - m->mdmcmdl = 0; - /* Fall through - check for 'A' */ - case 0: - if (c == 'A') { - m->mdmcmd[m->mdmcmdl] = c; - m->mdmcmd[++m->mdmcmdl] = 0; - } - break; - default: - m->mdmcmd[m->mdmcmdl] = c; - m->mdmcmd[++m->mdmcmdl] = 0; - } - } - } - } - return total; -} - -/* - * Switch all modem-channels who are online and got a valid - * escape-sequence 1.5 seconds ago, to command-mode. - * This function is called every second via timer-interrupt from within - * timer-dispatcher isdn_timer_function() - */ -void -isdn_tty_modem_escape(void) -{ - int ton = 0; - int i; - int midx; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) - if (USG_MODEM(dev->usage[i]) && (midx = dev->m_idx[i]) >= 0) { - modem_info *info = &dev->mdm.info[midx]; - if (info->online) { - ton = 1; - if ((info->emu.pluscount == 3) && - time_after(jiffies, - info->emu.lastplus + PLUSWAIT2)) { - info->emu.pluscount = 0; - info->online = 0; - isdn_tty_modem_result(RESULT_OK, info); - } - } - } - isdn_timer_ctrl(ISDN_TIMER_MODEMPLUS, ton); -} - -/* - * Put a RING-message to all modem-channels who have the RI-bit set. - * This function is called every second via timer-interrupt from within - * timer-dispatcher isdn_timer_function() - */ -void -isdn_tty_modem_ring(void) -{ - int ton = 0; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - modem_info *info = &dev->mdm.info[i]; - if (info->msr & UART_MSR_RI) { - ton = 1; - isdn_tty_modem_result(RESULT_RING, info); - } - } - isdn_timer_ctrl(ISDN_TIMER_MODEMRING, ton); -} - -/* - * For all online tty's, try sending data to - * the lower levels. - */ -void -isdn_tty_modem_xmit(void) -{ - int ton = 1; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - modem_info *info = &dev->mdm.info[i]; - if (info->online) { - ton = 1; - isdn_tty_senddown(info); - isdn_tty_tint(info); - } - } - isdn_timer_ctrl(ISDN_TIMER_MODEMXMIT, ton); -} - -/* - * Check all channels if we have a 'no carrier' timeout. - * Timeout value is set by Register S7. - */ -void -isdn_tty_carrier_timeout(void) -{ - int ton = 0; - int i; - - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { - modem_info *info = &dev->mdm.info[i]; - if (!info->dialing) - continue; - if (info->emu.carrierwait++ > info->emu.mdmreg[REG_WAITC]) { - info->dialing = 0; - isdn_tty_modem_result(RESULT_NO_CARRIER, info); - isdn_tty_modem_hup(info, 1); - } else - ton = 1; - } - isdn_timer_ctrl(ISDN_TIMER_CARRIER, ton); -} diff --git a/drivers/isdn/i4l/isdn_tty.h b/drivers/isdn/i4l/isdn_tty.h deleted file mode 100644 index a6f801d2263b..000000000000 --- a/drivers/isdn/i4l/isdn_tty.h +++ /dev/null @@ -1,120 +0,0 @@ -/* $Id: isdn_tty.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem, tty related functions (linklevel). - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - - -#define DLE 0x10 -#define ETX 0x03 -#define DC4 0x14 - - -/* - * Definition of some special Registers of AT-Emulator - */ -#define REG_RINGATA 0 -#define REG_RINGCNT 1 /* ring counter register */ -#define REG_ESC 2 -#define REG_CR 3 -#define REG_LF 4 -#define REG_BS 5 - -#define REG_WAITC 7 - -#define REG_RESP 12 /* show response messages register */ -#define BIT_RESP 1 /* show response messages bit */ -#define REG_RESPNUM 12 /* show numeric responses register */ -#define BIT_RESPNUM 2 /* show numeric responses bit */ -#define REG_ECHO 12 -#define BIT_ECHO 4 -#define REG_DCD 12 -#define BIT_DCD 8 -#define REG_CTS 12 -#define BIT_CTS 16 -#define REG_DTRR 12 -#define BIT_DTRR 32 -#define REG_DSR 12 -#define BIT_DSR 64 -#define REG_CPPP 12 -#define BIT_CPPP 128 - -#define REG_DXMT 13 -#define BIT_DXMT 1 -#define REG_T70 13 -#define BIT_T70 2 -#define BIT_T70_EXT 32 -#define REG_DTRHUP 13 -#define BIT_DTRHUP 4 -#define REG_RESPXT 13 -#define BIT_RESPXT 8 -#define REG_CIDONCE 13 -#define BIT_CIDONCE 16 -#define REG_RUNG 13 /* show RUNG message register */ -#define BIT_RUNG 64 /* show RUNG message bit */ -#define REG_DISPLAY 13 -#define BIT_DISPLAY 128 - -#define REG_L2PROT 14 -#define REG_L3PROT 15 -#define REG_PSIZE 16 -#define REG_WSIZE 17 -#define REG_SI1 18 -#define REG_SI2 19 -#define REG_SI1I 20 -#define REG_PLAN 21 -#define REG_SCREEN 22 - -#define REG_CPN 23 -#define BIT_CPN 1 -#define REG_CPNFCON 23 -#define BIT_CPNFCON 2 -#define REG_CDN 23 -#define BIT_CDN 4 - -/* defines for result codes */ -#define RESULT_OK 0 -#define RESULT_CONNECT 1 -#define RESULT_RING 2 -#define RESULT_NO_CARRIER 3 -#define RESULT_ERROR 4 -#define RESULT_CONNECT64000 5 -#define RESULT_NO_DIALTONE 6 -#define RESULT_BUSY 7 -#define RESULT_NO_ANSWER 8 -#define RESULT_RINGING 9 -#define RESULT_NO_MSN_EAZ 10 -#define RESULT_VCON 11 -#define RESULT_RUNG 12 - -#define TTY_IS_FCLASS1(info) \ - ((info->emu.mdmreg[REG_L2PROT] == ISDN_PROTO_L2_FAX) && \ - (info->emu.mdmreg[REG_L3PROT] == ISDN_PROTO_L3_FCLASS1)) -#define TTY_IS_FCLASS2(info) \ - ((info->emu.mdmreg[REG_L2PROT] == ISDN_PROTO_L2_FAX) && \ - (info->emu.mdmreg[REG_L3PROT] == ISDN_PROTO_L3_FCLASS2)) - -extern void isdn_tty_modem_escape(void); -extern void isdn_tty_modem_ring(void); -extern void isdn_tty_carrier_timeout(void); -extern void isdn_tty_modem_xmit(void); -extern int isdn_tty_modem_init(void); -extern void isdn_tty_exit(void); -extern void isdn_tty_readmodem(void); -extern int isdn_tty_find_icall(int, int, setup_parm *); -extern int isdn_tty_stat_callback(int, isdn_ctrl *); -extern int isdn_tty_rcv_skb(int, int, int, struct sk_buff *); -extern int isdn_tty_capi_facility(capi_msg *cm); -extern void isdn_tty_at_cout(char *, modem_info *); -extern void isdn_tty_modem_hup(modem_info *, int); -#ifdef CONFIG_ISDN_TTY_FAX -extern int isdn_tty_cmd_PLUSF_FAX(char **, modem_info *); -extern int isdn_tty_fax_command(modem_info *, isdn_ctrl *); -extern void isdn_tty_fax_bitorder(modem_info *, struct sk_buff *); -#endif diff --git a/drivers/isdn/i4l/isdn_ttyfax.c b/drivers/isdn/i4l/isdn_ttyfax.c deleted file mode 100644 index 47aae4916730..000000000000 --- a/drivers/isdn/i4l/isdn_ttyfax.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* $Id: isdn_ttyfax.c,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, tty_fax AT-command emulator (linklevel). - * - * Copyright 1999 by Armin Schindler (mac@melware.de) - * Copyright 1999 by Ralf Spachmann (mel@melware.de) - * Copyright 1999 by Cytronics & Melware - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#undef ISDN_TTY_FAX_STAT_DEBUG -#undef ISDN_TTY_FAX_CMD_DEBUG - -#include -#include "isdn_common.h" -#include "isdn_tty.h" -#include "isdn_ttyfax.h" - - -static char *isdn_tty_fax_revision = "$Revision: 1.1.2.2 $"; - -#define PARSE_ERROR1 { isdn_tty_fax_modem_result(1, info); return 1; } - -static char * -isdn_getrev(const char *revision) -{ - char *rev; - char *p; - - if ((p = strchr(revision, ':'))) { - rev = p + 2; - p = strchr(rev, '$'); - *--p = 0; - } else - rev = "???"; - return rev; -} - -/* - * Fax Class 2 Modem results - * - */ - -static void -isdn_tty_fax_modem_result(int code, modem_info *info) -{ - atemu *m = &info->emu; - T30_s *f = info->fax; - char rs[50]; - char rss[50]; - char *rp; - int i; - static char *msg[] = - {"OK", "ERROR", "+FCON", "+FCSI:", "+FDIS:", - "+FHNG:", "+FDCS:", "CONNECT", "+FTSI:", - "+FCFR", "+FPTS:", "+FET:"}; - - - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout(msg[code], info); - -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax send %s on ttyI%d\n", - msg[code], info->line); -#endif - switch (code) { - case 0: /* OK */ - break; - case 1: /* ERROR */ - break; - case 2: /* +FCON */ - /* Append CPN, if enabled */ - if ((m->mdmreg[REG_CPNFCON] & BIT_CPNFCON) && - (!(dev->usage[info->isdn_channel] & ISDN_USAGE_OUTGOING))) { - sprintf(rs, "/%s", m->cpn); - isdn_tty_at_cout(rs, info); - } - info->online = 1; - f->fet = 0; - if (f->phase == ISDN_FAX_PHASE_A) - f->phase = ISDN_FAX_PHASE_B; - break; - case 3: /* +FCSI */ - case 8: /* +FTSI */ - sprintf(rs, "\"%s\"", f->r_id); - isdn_tty_at_cout(rs, info); - break; - case 4: /* +FDIS */ - rs[0] = 0; - rp = &f->r_resolution; - for (i = 0; i < 8; i++) { - sprintf(rss, "%c%s", rp[i] + 48, - (i < 7) ? "," : ""); - strcat(rs, rss); - } - isdn_tty_at_cout(rs, info); -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax DIS=%s on ttyI%d\n", - rs, info->line); -#endif - break; - case 5: /* +FHNG */ - sprintf(rs, "%d", f->code); - isdn_tty_at_cout(rs, info); - info->faxonline = 0; - break; - case 6: /* +FDCS */ - rs[0] = 0; - rp = &f->r_resolution; - for (i = 0; i < 8; i++) { - sprintf(rss, "%c%s", rp[i] + 48, - (i < 7) ? "," : ""); - strcat(rs, rss); - } - isdn_tty_at_cout(rs, info); -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax DCS=%s on ttyI%d\n", - rs, info->line); -#endif - break; - case 7: /* CONNECT */ - info->faxonline |= 2; - break; - case 9: /* FCFR */ - break; - case 10: /* FPTS */ - isdn_tty_at_cout("1", info); - break; - case 11: /* FET */ - sprintf(rs, "%d", f->fet); - isdn_tty_at_cout(rs, info); - break; - } - - isdn_tty_at_cout("\r\n", info); - - switch (code) { - case 7: /* CONNECT */ - info->online = 2; - if (info->faxonline & 1) { - sprintf(rs, "%c", XON); - isdn_tty_at_cout(rs, info); - } - break; - } -} - -static int -isdn_tty_fax_command1(modem_info *info, isdn_ctrl *c) -{ - static char *msg[] = - {"OK", "CONNECT", "NO CARRIER", "ERROR", "FCERROR"}; - -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty: FCLASS1 cmd(%d)\n", c->parm.aux.cmd); -#endif - if (c->parm.aux.cmd < ISDN_FAX_CLASS1_QUERY) { - if (info->online) - info->online = 1; - isdn_tty_at_cout("\r\n", info); - isdn_tty_at_cout(msg[c->parm.aux.cmd], info); - isdn_tty_at_cout("\r\n", info); - } - switch (c->parm.aux.cmd) { - case ISDN_FAX_CLASS1_CONNECT: - info->online = 2; - break; - case ISDN_FAX_CLASS1_OK: - case ISDN_FAX_CLASS1_FCERROR: - case ISDN_FAX_CLASS1_ERROR: - case ISDN_FAX_CLASS1_NOCARR: - break; - case ISDN_FAX_CLASS1_QUERY: - isdn_tty_at_cout("\r\n", info); - if (!c->parm.aux.para[0]) { - isdn_tty_at_cout(msg[ISDN_FAX_CLASS1_ERROR], info); - isdn_tty_at_cout("\r\n", info); - } else { - isdn_tty_at_cout(c->parm.aux.para, info); - isdn_tty_at_cout("\r\nOK\r\n", info); - } - break; - } - return (0); -} - -int -isdn_tty_fax_command(modem_info *info, isdn_ctrl *c) -{ - T30_s *f = info->fax; - char rs[10]; - - if (TTY_IS_FCLASS1(info)) - return (isdn_tty_fax_command1(info, c)); - -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax cmd %d on ttyI%d\n", - f->r_code, info->line); -#endif - switch (f->r_code) { - case ISDN_TTY_FAX_FCON: - info->faxonline = 1; - isdn_tty_fax_modem_result(2, info); /* +FCON */ - return (0); - case ISDN_TTY_FAX_FCON_I: - info->faxonline = 16; - isdn_tty_fax_modem_result(2, info); /* +FCON */ - return (0); - case ISDN_TTY_FAX_RID: - if (info->faxonline & 1) - isdn_tty_fax_modem_result(3, info); /* +FCSI */ - if (info->faxonline & 16) - isdn_tty_fax_modem_result(8, info); /* +FTSI */ - return (0); - case ISDN_TTY_FAX_DIS: - isdn_tty_fax_modem_result(4, info); /* +FDIS */ - return (0); - case ISDN_TTY_FAX_HNG: - if (f->phase == ISDN_FAX_PHASE_C) { - if (f->direction == ISDN_TTY_FAX_CONN_IN) { - sprintf(rs, "%c%c", DLE, ETX); - isdn_tty_at_cout(rs, info); - } else { - sprintf(rs, "%c", 0x18); - isdn_tty_at_cout(rs, info); - } - info->faxonline &= ~2; /* leave data mode */ - info->online = 1; - } - f->phase = ISDN_FAX_PHASE_E; - isdn_tty_fax_modem_result(5, info); /* +FHNG */ - isdn_tty_fax_modem_result(0, info); /* OK */ - return (0); - case ISDN_TTY_FAX_DCS: - isdn_tty_fax_modem_result(6, info); /* +FDCS */ - isdn_tty_fax_modem_result(7, info); /* CONNECT */ - f->phase = ISDN_FAX_PHASE_C; - return (0); - case ISDN_TTY_FAX_TRAIN_OK: - isdn_tty_fax_modem_result(6, info); /* +FDCS */ - isdn_tty_fax_modem_result(0, info); /* OK */ - return (0); - case ISDN_TTY_FAX_SENT: - isdn_tty_fax_modem_result(0, info); /* OK */ - return (0); - case ISDN_TTY_FAX_CFR: - isdn_tty_fax_modem_result(9, info); /* +FCFR */ - return (0); - case ISDN_TTY_FAX_ET: - sprintf(rs, "%c%c", DLE, ETX); - isdn_tty_at_cout(rs, info); - isdn_tty_fax_modem_result(10, info); /* +FPTS */ - isdn_tty_fax_modem_result(11, info); /* +FET */ - isdn_tty_fax_modem_result(0, info); /* OK */ - info->faxonline &= ~2; /* leave data mode */ - info->online = 1; - f->phase = ISDN_FAX_PHASE_D; - return (0); - case ISDN_TTY_FAX_PTS: - isdn_tty_fax_modem_result(10, info); /* +FPTS */ - if (f->direction == ISDN_TTY_FAX_CONN_OUT) { - if (f->fet == 1) - f->phase = ISDN_FAX_PHASE_B; - if (f->fet == 0) - isdn_tty_fax_modem_result(0, info); /* OK */ - } - return (0); - case ISDN_TTY_FAX_EOP: - info->faxonline &= ~2; /* leave data mode */ - info->online = 1; - f->phase = ISDN_FAX_PHASE_D; - return (0); - - } - return (-1); -} - - -void -isdn_tty_fax_bitorder(modem_info *info, struct sk_buff *skb) -{ - __u8 LeftMask; - __u8 RightMask; - __u8 fBit; - __u8 Data; - int i; - - if (!info->fax->bor) { - for (i = 0; i < skb->len; i++) { - Data = skb->data[i]; - for ( - LeftMask = 0x80, RightMask = 0x01; - LeftMask > RightMask; - LeftMask >>= 1, RightMask <<= 1 - ) { - fBit = (Data & LeftMask); - if (Data & RightMask) - Data |= LeftMask; - else - Data &= ~LeftMask; - if (fBit) - Data |= RightMask; - else - Data &= ~RightMask; - - } - skb->data[i] = Data; - } - } -} - -/* - * Parse AT+F.. FAX class 1 commands - */ - -static int -isdn_tty_cmd_FCLASS1(char **p, modem_info *info) -{ - static char *cmd[] = - {"AE", "TS", "RS", "TM", "RM", "TH", "RH"}; - isdn_ctrl c; - int par, i; - u_long flags; - - for (c.parm.aux.cmd = 0; c.parm.aux.cmd < 7; c.parm.aux.cmd++) - if (!strncmp(p[0], cmd[c.parm.aux.cmd], 2)) - break; - -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty_cmd_FCLASS1 (%s,%d)\n", p[0], c.parm.aux.cmd); -#endif - if (c.parm.aux.cmd == 7) - PARSE_ERROR1; - - p[0] += 2; - switch (*p[0]) { - case '?': - p[0]++; - c.parm.aux.subcmd = AT_QUERY; - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - c.parm.aux.subcmd = AT_EQ_QUERY; - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - c.parm.aux.subcmd = AT_EQ_VALUE; - c.parm.aux.para[0] = par; - } - break; - case 0: - c.parm.aux.subcmd = AT_COMMAND; - break; - default: - PARSE_ERROR1; - } - c.command = ISDN_CMD_FAXCMD; -#ifdef ISDN_TTY_FAX_CMD_DEBUG - printk(KERN_DEBUG "isdn_tty_cmd_FCLASS1 %d/%d/%d)\n", - c.parm.aux.cmd, c.parm.aux.subcmd, c.parm.aux.para[0]); -#endif - if (info->isdn_driver < 0) { - if ((c.parm.aux.subcmd == AT_EQ_VALUE) || - (c.parm.aux.subcmd == AT_COMMAND)) { - PARSE_ERROR1; - } - spin_lock_irqsave(&dev->lock, flags); - /* get a temporary connection to the first free fax driver */ - i = isdn_get_free_channel(ISDN_USAGE_FAX, ISDN_PROTO_L2_FAX, - ISDN_PROTO_L3_FCLASS1, -1, -1, "00"); - if (i < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - PARSE_ERROR1; - } - info->isdn_driver = dev->drvmap[i]; - info->isdn_channel = dev->chanmap[i]; - info->drv_index = i; - dev->m_idx[i] = info->line; - spin_unlock_irqrestore(&dev->lock, flags); - c.driver = info->isdn_driver; - c.arg = info->isdn_channel; - isdn_command(&c); - spin_lock_irqsave(&dev->lock, flags); - isdn_free_channel(info->isdn_driver, info->isdn_channel, - ISDN_USAGE_FAX); - info->isdn_driver = -1; - info->isdn_channel = -1; - if (info->drv_index >= 0) { - dev->m_idx[info->drv_index] = -1; - info->drv_index = -1; - } - spin_unlock_irqrestore(&dev->lock, flags); - } else { - c.driver = info->isdn_driver; - c.arg = info->isdn_channel; - isdn_command(&c); - } - return 1; -} - -/* - * Parse AT+F.. FAX class 2 commands - */ - -static int -isdn_tty_cmd_FCLASS2(char **p, modem_info *info) -{ - atemu *m = &info->emu; - T30_s *f = info->fax; - isdn_ctrl cmd; - int par; - char rs[50]; - char rss[50]; - int maxdccval[] = - {1, 5, 2, 2, 3, 2, 0, 7}; - - /* FAA still unchanged */ - if (!strncmp(p[0], "AA", 2)) { /* TODO */ - p[0] += 2; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", 0); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* BADLIN=value - dummy 0=disable errorchk disabled, 1-255 nr. of lines for making page bad */ - if (!strncmp(p[0], "BADLIN", 6)) { - p[0] += 6; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->badlin); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-255"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - f->badlin = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FBADLIN=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* BADMUL=value - dummy 0=disable errorchk disabled (threshold multiplier) */ - if (!strncmp(p[0], "BADMUL", 6)) { - p[0] += 6; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->badmul); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-255"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - f->badmul = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FBADMUL=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* BOR=n - Phase C bit order, 0=direct, 1=reverse */ - if (!strncmp(p[0], "BOR", 3)) { - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->bor); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,1"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 1)) - PARSE_ERROR1; - f->bor = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FBOR=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* NBC=n - No Best Capabilities */ - if (!strncmp(p[0], "NBC", 3)) { - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->nbc); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,1"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 1)) - PARSE_ERROR1; - f->nbc = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FNBC=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* BUF? - Readonly buffersize readout */ - if (!strncmp(p[0], "BUF?", 4)) { - p[0] += 4; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FBUF? (%d) \n", (16 * m->mdmreg[REG_PSIZE])); -#endif - p[0]++; - sprintf(rs, "\r\n %d ", (16 * m->mdmreg[REG_PSIZE])); - isdn_tty_at_cout(rs, info); - return 0; - } - /* CIG=string - local fax station id string for polling rx */ - if (!strncmp(p[0], "CIG", 3)) { - int i, r; - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n\"%s\"", f->pollid); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n\"STRING\""); - isdn_tty_at_cout(rs, info); - } else { - if (*p[0] == '"') - p[0]++; - for (i = 0; (*p[0]) && i < (FAXIDLEN - 1) && (*p[0] != '"'); i++) { - f->pollid[i] = *p[0]++; - } - if (*p[0] == '"') - p[0]++; - for (r = i; r < FAXIDLEN; r++) { - f->pollid[r] = 32; - } - f->pollid[FAXIDLEN - 1] = 0; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax local poll ID rx \"%s\"\n", f->pollid); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* CQ=n - copy qlty chk, 0= no chk, 1=only 1D chk, 2=1D+2D chk */ - if (!strncmp(p[0], "CQ", 2)) { - p[0] += 2; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->cq); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,1,2"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 2)) - PARSE_ERROR1; - f->cq = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FCQ=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* CR=n - can receive? 0= no data rx or poll remote dev, 1=do receive data or poll remote dev */ - if (!strncmp(p[0], "CR", 2)) { - p[0] += 2; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->cr); /* read actual value from struct and print */ - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,1"); /* display online help */ - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 1)) - PARSE_ERROR1; - f->cr = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FCR=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* CTCRTY=value - ECM retry count */ - if (!strncmp(p[0], "CTCRTY", 6)) { - p[0] += 6; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->ctcrty); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-255"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - f->ctcrty = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FCTCRTY=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* DCC=vr,br,wd,ln,df,ec,bf,st - DCE capabilities parms */ - if (!strncmp(p[0], "DCC", 3)) { - char *rp = &f->resolution; - int i; - - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - strcpy(rs, "\r\n"); - for (i = 0; i < 8; i++) { - sprintf(rss, "%c%s", rp[i] + 48, - (i < 7) ? "," : ""); - strcat(rs, rss); - } - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - isdn_tty_at_cout("\r\n(0,1),(0-5),(0-2),(0-2),(0-3),(0-2),(0),(0-7)", info); - p[0]++; - } else { - for (i = 0; (((*p[0] >= '0') && (*p[0] <= '9')) || (*p[0] == ',')) && (i < 8); i++) { - if (*p[0] != ',') { - if ((*p[0] - 48) > maxdccval[i]) { - PARSE_ERROR1; - } - rp[i] = *p[0] - 48; - p[0]++; - if (*p[0] == ',') - p[0]++; - } else - p[0]++; - } -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FDCC capabilities DCE=%d,%d,%d,%d,%d,%d,%d,%d\n", - rp[0], rp[1], rp[2], rp[3], rp[4], rp[5], rp[6], rp[7]); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* DIS=vr,br,wd,ln,df,ec,bf,st - current session parms */ - if (!strncmp(p[0], "DIS", 3)) { - char *rp = &f->resolution; - int i; - - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - strcpy(rs, "\r\n"); - for (i = 0; i < 8; i++) { - sprintf(rss, "%c%s", rp[i] + 48, - (i < 7) ? "," : ""); - strcat(rs, rss); - } - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - isdn_tty_at_cout("\r\n(0,1),(0-5),(0-2),(0-2),(0-3),(0-2),(0),(0-7)", info); - p[0]++; - } else { - for (i = 0; (((*p[0] >= '0') && (*p[0] <= '9')) || (*p[0] == ',')) && (i < 8); i++) { - if (*p[0] != ',') { - if ((*p[0] - 48) > maxdccval[i]) { - PARSE_ERROR1; - } - rp[i] = *p[0] - 48; - p[0]++; - if (*p[0] == ',') - p[0]++; - } else - p[0]++; - } -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FDIS session parms=%d,%d,%d,%d,%d,%d,%d,%d\n", - rp[0], rp[1], rp[2], rp[3], rp[4], rp[5], rp[6], rp[7]); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* DR - Receive Phase C data command, initiates document reception */ - if (!strncmp(p[0], "DR", 2)) { - p[0] += 2; - if ((info->faxonline & 16) && /* incoming connection */ - ((f->phase == ISDN_FAX_PHASE_B) || (f->phase == ISDN_FAX_PHASE_D))) { -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FDR\n"); -#endif - f->code = ISDN_TTY_FAX_DR; - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_FAXCMD; - isdn_command(&cmd); - if (f->phase == ISDN_FAX_PHASE_B) { - f->phase = ISDN_FAX_PHASE_C; - } else if (f->phase == ISDN_FAX_PHASE_D) { - switch (f->fet) { - case 0: /* next page will be received */ - f->phase = ISDN_FAX_PHASE_C; - isdn_tty_fax_modem_result(7, info); /* CONNECT */ - break; - case 1: /* next doc will be received */ - f->phase = ISDN_FAX_PHASE_B; - break; - case 2: /* fax session is terminating */ - f->phase = ISDN_FAX_PHASE_E; - break; - default: - PARSE_ERROR1; - } - } - } else { - PARSE_ERROR1; - } - return 1; - } - /* DT=df,vr,wd,ln - TX phase C data command (release DCE to proceed with negotiation) */ - if (!strncmp(p[0], "DT", 2)) { - int i, val[] = - {4, 0, 2, 3}; - char *rp = &f->resolution; - - p[0] += 2; - if (!(info->faxonline & 1)) /* not outgoing connection */ - PARSE_ERROR1; - - for (i = 0; (((*p[0] >= '0') && (*p[0] <= '9')) || (*p[0] == ',')) && (i < 4); i++) { - if (*p[0] != ',') { - if ((*p[0] - 48) > maxdccval[val[i]]) { - PARSE_ERROR1; - } - rp[val[i]] = *p[0] - 48; - p[0]++; - if (*p[0] == ',') - p[0]++; - } else - p[0]++; - } -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FDT tx data command parms=%d,%d,%d,%d\n", - rp[4], rp[0], rp[2], rp[3]); -#endif - if ((f->phase == ISDN_FAX_PHASE_B) || (f->phase == ISDN_FAX_PHASE_D)) { - f->code = ISDN_TTY_FAX_DT; - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_FAXCMD; - isdn_command(&cmd); - if (f->phase == ISDN_FAX_PHASE_D) { - f->phase = ISDN_FAX_PHASE_C; - isdn_tty_fax_modem_result(7, info); /* CONNECT */ - } - } else { - PARSE_ERROR1; - } - return 1; - } - /* ECM=n - Error mode control 0=disabled, 2=enabled, handled by DCE alone incl. buff of partial pages */ - if (!strncmp(p[0], "ECM", 3)) { - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->ecm); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,2"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par != 0) && (par != 2)) - PARSE_ERROR1; - f->ecm = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FECM=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* ET=n - End of page or document */ - if (!strncmp(p[0], "ET=", 3)) { - p[0] += 3; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-2"); - isdn_tty_at_cout(rs, info); - } else { - if ((f->phase != ISDN_FAX_PHASE_D) || - (!(info->faxonline & 1))) - PARSE_ERROR1; - par = isdn_getnum(p); - if ((par < 0) || (par > 2)) - PARSE_ERROR1; - f->fet = par; - f->code = ISDN_TTY_FAX_ET; - cmd.driver = info->isdn_driver; - cmd.arg = info->isdn_channel; - cmd.command = ISDN_CMD_FAXCMD; - isdn_command(&cmd); -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FET=%d\n", par); -#endif - return 1; - } - return 0; - } - /* K - terminate */ - if (!strncmp(p[0], "K", 1)) { - p[0] += 1; - if ((f->phase == ISDN_FAX_PHASE_IDLE) || (f->phase == ISDN_FAX_PHASE_E)) - PARSE_ERROR1; - isdn_tty_modem_hup(info, 1); - return 1; - } - /* LID=string - local fax ID */ - if (!strncmp(p[0], "LID", 3)) { - int i, r; - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n\"%s\"", f->id); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n\"STRING\""); - isdn_tty_at_cout(rs, info); - } else { - if (*p[0] == '"') - p[0]++; - for (i = 0; (*p[0]) && i < (FAXIDLEN - 1) && (*p[0] != '"'); i++) { - f->id[i] = *p[0]++; - } - if (*p[0] == '"') - p[0]++; - for (r = i; r < FAXIDLEN; r++) { - f->id[r] = 32; - } - f->id[FAXIDLEN - 1] = 0; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax local ID \"%s\"\n", f->id); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - - /* MDL? - DCE Model */ - if (!strncmp(p[0], "MDL?", 4)) { - p[0] += 4; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: FMDL?\n"); -#endif - isdn_tty_at_cout("\r\nisdn4linux", info); - return 0; - } - /* MFR? - DCE Manufacturer */ - if (!strncmp(p[0], "MFR?", 4)) { - p[0] += 4; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: FMFR?\n"); -#endif - isdn_tty_at_cout("\r\nisdn4linux", info); - return 0; - } - /* MINSP=n - Minimum Speed for Phase C */ - if (!strncmp(p[0], "MINSP", 5)) { - p[0] += 5; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->minsp); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-5"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 5)) - PARSE_ERROR1; - f->minsp = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FMINSP=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* PHCTO=value - DTE phase C timeout */ - if (!strncmp(p[0], "PHCTO", 5)) { - p[0] += 5; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->phcto); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0-255"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 255)) - PARSE_ERROR1; - f->phcto = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FPHCTO=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - - /* REL=n - Phase C received EOL alignment */ - if (!strncmp(p[0], "REL", 3)) { - p[0] += 3; - switch (*p[0]) { - case '?': - p[0]++; - sprintf(rs, "\r\n%d", f->rel); - isdn_tty_at_cout(rs, info); - break; - case '=': - p[0]++; - if (*p[0] == '?') { - p[0]++; - sprintf(rs, "\r\n0,1"); - isdn_tty_at_cout(rs, info); - } else { - par = isdn_getnum(p); - if ((par < 0) || (par > 1)) - PARSE_ERROR1; - f->rel = par; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FREL=%d\n", par); -#endif - } - break; - default: - PARSE_ERROR1; - } - return 0; - } - /* REV? - DCE Revision */ - if (!strncmp(p[0], "REV?", 4)) { - p[0] += 4; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: FREV?\n"); -#endif - strcpy(rss, isdn_tty_fax_revision); - sprintf(rs, "\r\nRev: %s", isdn_getrev(rss)); - isdn_tty_at_cout(rs, info); - return 0; - } - - /* Phase C Transmit Data Block Size */ - if (!strncmp(p[0], "TBC=", 4)) { /* dummy, not used */ - p[0] += 4; -#ifdef ISDN_TTY_FAX_STAT_DEBUG - printk(KERN_DEBUG "isdn_tty: Fax FTBC=%c\n", *p[0]); -#endif - switch (*p[0]) { - case '0': - p[0]++; - break; - default: - PARSE_ERROR1; - } - return 0; - } - printk(KERN_DEBUG "isdn_tty: unknown token=>AT+F%s<\n", p[0]); - PARSE_ERROR1; -} - -int -isdn_tty_cmd_PLUSF_FAX(char **p, modem_info *info) -{ - if (TTY_IS_FCLASS2(info)) - return (isdn_tty_cmd_FCLASS2(p, info)); - else if (TTY_IS_FCLASS1(info)) - return (isdn_tty_cmd_FCLASS1(p, info)); - PARSE_ERROR1; -} diff --git a/drivers/isdn/i4l/isdn_ttyfax.h b/drivers/isdn/i4l/isdn_ttyfax.h deleted file mode 100644 index ccda4fcf8f7b..000000000000 --- a/drivers/isdn/i4l/isdn_ttyfax.h +++ /dev/null @@ -1,17 +0,0 @@ -/* $Id: isdn_ttyfax.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem, tty_fax related functions (linklevel). - * - * Copyright 1999 by Armin Schindler (mac@melware.de) - * Copyright 1999 by Ralf Spachmann (mel@melware.de) - * Copyright 1999 by Cytronics & Melware - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - - -#define XON 0x11 -#define XOFF 0x13 -#define DC2 0x12 diff --git a/drivers/isdn/i4l/isdn_v110.c b/drivers/isdn/i4l/isdn_v110.c deleted file mode 100644 index d11fe76f138f..000000000000 --- a/drivers/isdn/i4l/isdn_v110.c +++ /dev/null @@ -1,625 +0,0 @@ -/* $Id: isdn_v110.c,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, V.110 related functions (linklevel). - * - * Copyright by Thomas Pfeiffer (pfeiffer@pds.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include -#include - -#include -#include "isdn_v110.h" - -#undef ISDN_V110_DEBUG - -char *isdn_v110_revision = "$Revision: 1.1.2.2 $"; - -#define V110_38400 255 -#define V110_19200 15 -#define V110_9600 3 - -/* - * The following data are precoded matrices, online and offline matrix - * for 9600, 19200 und 38400, respectively - */ -static unsigned char V110_OnMatrix_9600[] = -{0xfc, 0xfc, 0xfc, 0xfc, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, - 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd, - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, - 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd}; - -static unsigned char V110_OffMatrix_9600[] = -{0xfc, 0xfc, 0xfc, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -static unsigned char V110_OnMatrix_19200[] = -{0xf0, 0xf0, 0xff, 0xf7, 0xff, 0xf7, 0xff, 0xf7, 0xff, 0xf7, - 0xfd, 0xff, 0xff, 0xf7, 0xff, 0xf7, 0xff, 0xf7, 0xff, 0xf7}; - -static unsigned char V110_OffMatrix_19200[] = -{0xf0, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -static unsigned char V110_OnMatrix_38400[] = -{0x00, 0x7f, 0x7f, 0x7f, 0x7f, 0xfd, 0x7f, 0x7f, 0x7f, 0x7f}; - -static unsigned char V110_OffMatrix_38400[] = -{0x00, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff}; - -/* - * FlipBits reorders sequences of keylen bits in one byte. - * E.g. source order 7654321 will be converted to 45670123 when keylen = 4, - * and to 67452301 when keylen = 2. This is necessary because ordering on - * the isdn line is the other way. - */ -static inline unsigned char -FlipBits(unsigned char c, int keylen) -{ - unsigned char b = c; - unsigned char bit = 128; - int i; - int j; - int hunks = (8 / keylen); - - c = 0; - for (i = 0; i < hunks; i++) { - for (j = 0; j < keylen; j++) { - if (b & (bit >> j)) - c |= bit >> (keylen - j - 1); - } - bit >>= keylen; - } - return c; -} - - -/* isdn_v110_open allocates and initializes private V.110 data - * structures and returns a pointer to these. - */ -static isdn_v110_stream * -isdn_v110_open(unsigned char key, int hdrlen, int maxsize) -{ - int i; - isdn_v110_stream *v; - - if ((v = kzalloc(sizeof(isdn_v110_stream), GFP_ATOMIC)) == NULL) - return NULL; - v->key = key; - v->nbits = 0; - for (i = 0; key & (1 << i); i++) - v->nbits++; - - v->nbytes = 8 / v->nbits; - v->decodelen = 0; - - switch (key) { - case V110_38400: - v->OnlineFrame = V110_OnMatrix_38400; - v->OfflineFrame = V110_OffMatrix_38400; - break; - case V110_19200: - v->OnlineFrame = V110_OnMatrix_19200; - v->OfflineFrame = V110_OffMatrix_19200; - break; - default: - v->OnlineFrame = V110_OnMatrix_9600; - v->OfflineFrame = V110_OffMatrix_9600; - break; - } - v->framelen = v->nbytes * 10; - v->SyncInit = 5; - v->introducer = 0; - v->dbit = 1; - v->b = 0; - v->skbres = hdrlen; - v->maxsize = maxsize - hdrlen; - if ((v->encodebuf = kmalloc(maxsize, GFP_ATOMIC)) == NULL) { - kfree(v); - return NULL; - } - return v; -} - -/* isdn_v110_close frees private V.110 data structures */ -void -isdn_v110_close(isdn_v110_stream *v) -{ - if (v == NULL) - return; -#ifdef ISDN_V110_DEBUG - printk(KERN_DEBUG "v110 close\n"); -#endif - kfree(v->encodebuf); - kfree(v); -} - - -/* - * ValidHeaderBytes return the number of valid bytes in v->decodebuf - */ -static int -ValidHeaderBytes(isdn_v110_stream *v) -{ - int i; - for (i = 0; (i < v->decodelen) && (i < v->nbytes); i++) - if ((v->decodebuf[i] & v->key) != 0) - break; - return i; -} - -/* - * SyncHeader moves the decodebuf ptr to the next valid header - */ -static void -SyncHeader(isdn_v110_stream *v) -{ - unsigned char *rbuf = v->decodebuf; - int len = v->decodelen; - - if (len == 0) - return; - for (rbuf++, len--; len > 0; len--, rbuf++) /* such den SyncHeader in buf ! */ - if ((*rbuf & v->key) == 0) /* erstes byte gefunden ? */ - break; /* jupp! */ - if (len) - memcpy(v->decodebuf, rbuf, len); - - v->decodelen = len; -#ifdef ISDN_V110_DEBUG - printk(KERN_DEBUG "isdn_v110: Header resync\n"); -#endif -} - -/* DecodeMatrix takes n (n>=1) matrices (v110 frames, 10 bytes) where - len is the number of matrix-lines. len must be a multiple of 10, i.e. - only complete matices must be given. - From these, netto data is extracted and returned in buf. The return-value - is the bytecount of the decoded data. -*/ -static int -DecodeMatrix(isdn_v110_stream *v, unsigned char *m, int len, unsigned char *buf) -{ - int line = 0; - int buflen = 0; - int mbit = 64; - int introducer = v->introducer; - int dbit = v->dbit; - unsigned char b = v->b; - - while (line < len) { /* Are we done with all lines of the matrix? */ - if ((line % 10) == 0) { /* the 0. line of the matrix is always 0 ! */ - if (m[line] != 0x00) { /* not 0 ? -> error! */ -#ifdef ISDN_V110_DEBUG - printk(KERN_DEBUG "isdn_v110: DecodeMatrix, V110 Bad Header\n"); - /* returning now is not the right thing, though :-( */ -#endif - } - line++; /* next line of matrix */ - continue; - } else if ((line % 10) == 5) { /* in line 5 there's only e-bits ! */ - if ((m[line] & 0x70) != 0x30) { /* 011 has to be at the beginning! */ -#ifdef ISDN_V110_DEBUG - printk(KERN_DEBUG "isdn_v110: DecodeMatrix, V110 Bad 5th line\n"); - /* returning now is not the right thing, though :-( */ -#endif - } - line++; /* next line */ - continue; - } else if (!introducer) { /* every byte starts with 10 (stopbit, startbit) */ - introducer = (m[line] & mbit) ? 0 : 1; /* current bit of the matrix */ - next_byte: - if (mbit > 2) { /* was it the last bit in this line ? */ - mbit >>= 1; /* no -> take next */ - continue; - } /* otherwise start with leftmost bit in the next line */ - mbit = 64; - line++; - continue; - } else { /* otherwise we need to set a data bit */ - if (m[line] & mbit) /* was that bit set in the matrix ? */ - b |= dbit; /* yes -> set it in the data byte */ - else - b &= dbit - 1; /* no -> clear it in the data byte */ - if (dbit < 128) /* is that data byte done ? */ - dbit <<= 1; /* no, got the next bit */ - else { /* data byte is done */ - buf[buflen++] = b; /* copy byte into the output buffer */ - introducer = b = 0; /* init of the intro sequence and of the data byte */ - dbit = 1; /* next we look for the 0th bit */ - } - goto next_byte; /* look for next bit in the matrix */ - } - } - v->introducer = introducer; - v->dbit = dbit; - v->b = b; - return buflen; /* return number of bytes in the output buffer */ -} - -/* - * DecodeStream receives V.110 coded data from the input stream. It recovers the - * original frames. - * The input stream doesn't need to be framed - */ -struct sk_buff * -isdn_v110_decode(isdn_v110_stream *v, struct sk_buff *skb) -{ - int i; - int j; - int len; - unsigned char *v110_buf; - unsigned char *rbuf; - - if (!skb) { - printk(KERN_WARNING "isdn_v110_decode called with NULL skb!\n"); - return NULL; - } - rbuf = skb->data; - len = skb->len; - if (v == NULL) { - /* invalid handle, no chance to proceed */ - printk(KERN_WARNING "isdn_v110_decode called with NULL stream!\n"); - dev_kfree_skb(skb); - return NULL; - } - if (v->decodelen == 0) /* cache empty? */ - for (; len > 0; len--, rbuf++) /* scan for SyncHeader in buf */ - if ((*rbuf & v->key) == 0) - break; /* found first byte */ - if (len == 0) { - dev_kfree_skb(skb); - return NULL; - } - /* copy new data to decode-buffer */ - memcpy(&(v->decodebuf[v->decodelen]), rbuf, len); - v->decodelen += len; -ReSync: - if (v->decodelen < v->nbytes) { /* got a new header ? */ - dev_kfree_skb(skb); - return NULL; /* no, try later */ - } - if (ValidHeaderBytes(v) != v->nbytes) { /* is that a valid header? */ - SyncHeader(v); /* no -> look for header */ - goto ReSync; - } - len = (v->decodelen - (v->decodelen % (10 * v->nbytes))) / v->nbytes; - if ((v110_buf = kmalloc(len, GFP_ATOMIC)) == NULL) { - printk(KERN_WARNING "isdn_v110_decode: Couldn't allocate v110_buf\n"); - dev_kfree_skb(skb); - return NULL; - } - for (i = 0; i < len; i++) { - v110_buf[i] = 0; - for (j = 0; j < v->nbytes; j++) - v110_buf[i] |= (v->decodebuf[(i * v->nbytes) + j] & v->key) << (8 - ((j + 1) * v->nbits)); - v110_buf[i] = FlipBits(v110_buf[i], v->nbits); - } - v->decodelen = (v->decodelen % (10 * v->nbytes)); - memcpy(v->decodebuf, &(v->decodebuf[len * v->nbytes]), v->decodelen); - - skb_trim(skb, DecodeMatrix(v, v110_buf, len, skb->data)); - kfree(v110_buf); - if (skb->len) - return skb; - else { - kfree_skb(skb); - return NULL; - } -} - -/* EncodeMatrix takes input data in buf, len is the bytecount. - Data is encoded into v110 frames in m. Return value is the number of - matrix-lines generated. -*/ -static int -EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen) -{ - int line = 0; - int i = 0; - int mbit = 128; - int dbit = 1; - int introducer = 3; - int ibit[] = {0, 1, 1}; - - while ((i < len) && (line < mlen)) { /* while we still have input data */ - switch (line % 10) { /* in which line of the matrix are we? */ - case 0: - m[line++] = 0x00; /* line 0 is always 0 */ - mbit = 128; /* go on with the 7th bit */ - break; - case 5: - m[line++] = 0xbf; /* line 5 is always 10111111 */ - mbit = 128; /* go on with the 7th bit */ - break; - } - if (line >= mlen) { - printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n"); - return line; - } - next_bit: - switch (mbit) { /* leftmost or rightmost bit ? */ - case 1: - line++; /* rightmost -> go to next line */ - if (line >= mlen) { - printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n"); - return line; - } - /* fall through */ - case 128: - m[line] = 128; /* leftmost -> set byte to 1000000 */ - mbit = 64; /* current bit in the matrix line */ - continue; - } - if (introducer) { /* set 110 sequence ? */ - introducer--; /* set on digit less */ - m[line] |= ibit[introducer] ? mbit : 0; /* set corresponding bit */ - mbit >>= 1; /* bit of matrix line >> 1 */ - goto next_bit; /* and go on there */ - } /* else push data bits into the matrix! */ - m[line] |= (buf[i] & dbit) ? mbit : 0; /* set data bit in matrix */ - if (dbit == 128) { /* was it the last one? */ - dbit = 1; /* then go on with first bit of */ - i++; /* next byte in input buffer */ - if (i < len) /* input buffer done ? */ - introducer = 3; /* no, write introducer 110 */ - else { /* input buffer done ! */ - m[line] |= (mbit - 1) & 0xfe; /* set remaining bits in line to 1 */ - break; - } - } else /* not the last data bit */ - dbit <<= 1; /* then go to next data bit */ - mbit >>= 1; /* go to next bit of matrix */ - goto next_bit; - - } - /* if necessary, generate remaining lines of the matrix... */ - if ((line) && ((line + 10) < mlen)) - switch (++line % 10) { - case 1: - m[line++] = 0xfe; - /* fall through */ - case 2: - m[line++] = 0xfe; - /* fall through */ - case 3: - m[line++] = 0xfe; - /* fall through */ - case 4: - m[line++] = 0xfe; - /* fall through */ - case 5: - m[line++] = 0xbf; - /* fall through */ - case 6: - m[line++] = 0xfe; - /* fall through */ - case 7: - m[line++] = 0xfe; - /* fall through */ - case 8: - m[line++] = 0xfe; - /* fall through */ - case 9: - m[line++] = 0xfe; - } - return line; /* that's how many lines we have */ -} - -/* - * Build a sync frame. - */ -static struct sk_buff * -isdn_v110_sync(isdn_v110_stream *v) -{ - struct sk_buff *skb; - - if (v == NULL) { - /* invalid handle, no chance to proceed */ - printk(KERN_WARNING "isdn_v110_sync called with NULL stream!\n"); - return NULL; - } - if ((skb = dev_alloc_skb(v->framelen + v->skbres))) { - skb_reserve(skb, v->skbres); - skb_put_data(skb, v->OfflineFrame, v->framelen); - } - return skb; -} - -/* - * Build an idle frame. - */ -static struct sk_buff * -isdn_v110_idle(isdn_v110_stream *v) -{ - struct sk_buff *skb; - - if (v == NULL) { - /* invalid handle, no chance to proceed */ - printk(KERN_WARNING "isdn_v110_sync called with NULL stream!\n"); - return NULL; - } - if ((skb = dev_alloc_skb(v->framelen + v->skbres))) { - skb_reserve(skb, v->skbres); - skb_put_data(skb, v->OnlineFrame, v->framelen); - } - return skb; -} - -struct sk_buff * -isdn_v110_encode(isdn_v110_stream *v, struct sk_buff *skb) -{ - int i; - int j; - int rlen; - int mlen; - int olen; - int size; - int sval1; - int sval2; - int nframes; - unsigned char *v110buf; - unsigned char *rbuf; - struct sk_buff *nskb; - - if (v == NULL) { - /* invalid handle, no chance to proceed */ - printk(KERN_WARNING "isdn_v110_encode called with NULL stream!\n"); - return NULL; - } - if (!skb) { - /* invalid skb, no chance to proceed */ - printk(KERN_WARNING "isdn_v110_encode called with NULL skb!\n"); - return NULL; - } - rlen = skb->len; - nframes = (rlen + 3) / 4; - v110buf = v->encodebuf; - if ((nframes * 40) > v->maxsize) { - size = v->maxsize; - rlen = v->maxsize / 40; - } else - size = nframes * 40; - if (!(nskb = dev_alloc_skb(size + v->skbres + sizeof(int)))) { - printk(KERN_WARNING "isdn_v110_encode: Couldn't alloc skb\n"); - return NULL; - } - skb_reserve(nskb, v->skbres + sizeof(int)); - if (skb->len == 0) { - skb_put_data(nskb, v->OnlineFrame, v->framelen); - *((int *)skb_push(nskb, sizeof(int))) = 0; - return nskb; - } - mlen = EncodeMatrix(skb->data, rlen, v110buf, size); - /* now distribute 2 or 4 bits each to the output stream! */ - rbuf = skb_put(nskb, size); - olen = 0; - sval1 = 8 - v->nbits; - sval2 = v->key << sval1; - for (i = 0; i < mlen; i++) { - v110buf[i] = FlipBits(v110buf[i], v->nbits); - for (j = 0; j < v->nbytes; j++) { - if (size--) - *rbuf++ = ~v->key | (((v110buf[i] << (j * v->nbits)) & sval2) >> sval1); - else { - printk(KERN_WARNING "isdn_v110_encode: buffers full!\n"); - goto buffer_full; - } - olen++; - } - } -buffer_full: - skb_trim(nskb, olen); - *((int *)skb_push(nskb, sizeof(int))) = rlen; - return nskb; -} - -int -isdn_v110_stat_callback(int idx, isdn_ctrl *c) -{ - isdn_v110_stream *v = NULL; - int i; - int ret = 0; - - if (idx < 0) - return 0; - switch (c->command) { - case ISDN_STAT_BSENT: - /* Keep the send-queue of the driver filled - * with frames: - * If number of outstanding frames < 3, - * send down an Idle-Frame (or an Sync-Frame, if - * v->SyncInit != 0). - */ - if (!(v = dev->v110[idx])) - return 0; - atomic_inc(&dev->v110use[idx]); - for (i = 0; i * v->framelen < c->parm.length; i++) { - if (v->skbidle > 0) { - v->skbidle--; - ret = 1; - } else { - if (v->skbuser > 0) - v->skbuser--; - ret = 0; - } - } - for (i = v->skbuser + v->skbidle; i < 2; i++) { - struct sk_buff *skb; - if (v->SyncInit > 0) - skb = isdn_v110_sync(v); - else - skb = isdn_v110_idle(v); - if (skb) { - if (dev->drv[c->driver]->interface->writebuf_skb(c->driver, c->arg, 1, skb) <= 0) { - dev_kfree_skb(skb); - break; - } else { - if (v->SyncInit) - v->SyncInit--; - v->skbidle++; - } - } else - break; - } - atomic_dec(&dev->v110use[idx]); - return ret; - case ISDN_STAT_DHUP: - case ISDN_STAT_BHUP: - while (1) { - atomic_inc(&dev->v110use[idx]); - if (atomic_dec_and_test(&dev->v110use[idx])) { - isdn_v110_close(dev->v110[idx]); - dev->v110[idx] = NULL; - break; - } - mdelay(1); - } - break; - case ISDN_STAT_BCONN: - if (dev->v110emu[idx] && (dev->v110[idx] == NULL)) { - int hdrlen = dev->drv[c->driver]->interface->hl_hdrlen; - int maxsize = dev->drv[c->driver]->interface->maxbufsize; - atomic_inc(&dev->v110use[idx]); - switch (dev->v110emu[idx]) { - case ISDN_PROTO_L2_V11096: - dev->v110[idx] = isdn_v110_open(V110_9600, hdrlen, maxsize); - break; - case ISDN_PROTO_L2_V11019: - dev->v110[idx] = isdn_v110_open(V110_19200, hdrlen, maxsize); - break; - case ISDN_PROTO_L2_V11038: - dev->v110[idx] = isdn_v110_open(V110_38400, hdrlen, maxsize); - break; - default:; - } - if ((v = dev->v110[idx])) { - while (v->SyncInit) { - struct sk_buff *skb = isdn_v110_sync(v); - if (dev->drv[c->driver]->interface->writebuf_skb(c->driver, c->arg, 1, skb) <= 0) { - dev_kfree_skb(skb); - /* Unable to send, try later */ - break; - } - v->SyncInit--; - v->skbidle++; - } - } else - printk(KERN_WARNING "isdn_v110: Couldn't open stream for chan %d\n", idx); - atomic_dec(&dev->v110use[idx]); - } - break; - default: - return 0; - } - return 0; -} diff --git a/drivers/isdn/i4l/isdn_v110.h b/drivers/isdn/i4l/isdn_v110.h deleted file mode 100644 index de774ab598c9..000000000000 --- a/drivers/isdn/i4l/isdn_v110.h +++ /dev/null @@ -1,29 +0,0 @@ -/* $Id: isdn_v110.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, V.110 related functions (linklevel). - * - * Copyright by Thomas Pfeiffer (pfeiffer@pds.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _isdn_v110_h_ -#define _isdn_v110_h_ - -/* - * isdn_v110_encode will take raw data and encode it using V.110 - */ -extern struct sk_buff *isdn_v110_encode(isdn_v110_stream *, struct sk_buff *); - -/* - * isdn_v110_decode receives V.110 coded data from the stream and rebuilds - * frames from them. The source stream doesn't need to be framed. - */ -extern struct sk_buff *isdn_v110_decode(isdn_v110_stream *, struct sk_buff *); - -extern int isdn_v110_stat_callback(int, isdn_ctrl *); -extern void isdn_v110_close(isdn_v110_stream *v); - -#endif diff --git a/drivers/isdn/i4l/isdn_x25iface.c b/drivers/isdn/i4l/isdn_x25iface.c deleted file mode 100644 index 48bfbcb4a09d..000000000000 --- a/drivers/isdn/i4l/isdn_x25iface.c +++ /dev/null @@ -1,332 +0,0 @@ -/* $Id: isdn_x25iface.c,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * Linux ISDN subsystem, X.25 related functions - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * stuff needed to support the Linux X.25 PLP code on top of devices that - * can provide a lab_b service using the concap_proto mechanism. - * This module supports a network interface which provides lapb_sematics - * -- as defined in Documentation/networking/x25-iface.txt -- to - * the upper layer and assumes that the lower layer provides a reliable - * data link service by means of the concap_device_ops callbacks. - * - * Only protocol specific stuff goes here. Device specific stuff - * goes to another -- device related -- concap_proto support source file. - * - */ - -/* #include */ -#include -#include -#include -#include -#include -#include "isdn_x25iface.h" - -/* for debugging messages not to cause an oops when device pointer is NULL*/ -#define MY_DEVNAME(dev) ((dev) ? (dev)->name : "DEVICE UNSPECIFIED") - - -typedef struct isdn_x25iface_proto_data { - int magic; - enum wan_states state; - /* Private stuff, not to be accessed via proto_data. We provide the - other storage for the concap_proto instance here as well, - enabling us to allocate both with just one kmalloc(): */ - struct concap_proto priv; -} ix25_pdata_t; - - - -/* is now in header file (extern): struct concap_proto * isdn_x25iface_proto_new(void); */ -static void isdn_x25iface_proto_del(struct concap_proto *); -static int isdn_x25iface_proto_close(struct concap_proto *); -static int isdn_x25iface_proto_restart(struct concap_proto *, - struct net_device *, - struct concap_device_ops *); -static int isdn_x25iface_xmit(struct concap_proto *, struct sk_buff *); -static int isdn_x25iface_receive(struct concap_proto *, struct sk_buff *); -static int isdn_x25iface_connect_ind(struct concap_proto *); -static int isdn_x25iface_disconn_ind(struct concap_proto *); - - -static struct concap_proto_ops ix25_pops = { - .proto_new = &isdn_x25iface_proto_new, - .proto_del = &isdn_x25iface_proto_del, - .restart = &isdn_x25iface_proto_restart, - .close = &isdn_x25iface_proto_close, - .encap_and_xmit = &isdn_x25iface_xmit, - .data_ind = &isdn_x25iface_receive, - .connect_ind = &isdn_x25iface_connect_ind, - .disconn_ind = &isdn_x25iface_disconn_ind -}; - -/* error message helper function */ -static void illegal_state_warn(unsigned state, unsigned char firstbyte) -{ - printk(KERN_WARNING "isdn_x25iface: firstbyte %x illegal in" - "current state %d\n", firstbyte, state); -} - -/* check protocol data field for consistency */ -static int pdata_is_bad(ix25_pdata_t *pda) { - - if (pda && pda->magic == ISDN_X25IFACE_MAGIC) return 0; - printk(KERN_WARNING - "isdn_x25iface_xxx: illegal pointer to proto data\n"); - return 1; -} - -/* create a new x25 interface protocol instance - */ -struct concap_proto *isdn_x25iface_proto_new(void) -{ - ix25_pdata_t *tmp = kmalloc(sizeof(ix25_pdata_t), GFP_KERNEL); - IX25DEBUG("isdn_x25iface_proto_new\n"); - if (tmp) { - tmp->magic = ISDN_X25IFACE_MAGIC; - tmp->state = WAN_UNCONFIGURED; - /* private data space used to hold the concap_proto data. - Only to be accessed via the returned pointer */ - spin_lock_init(&tmp->priv.lock); - tmp->priv.dops = NULL; - tmp->priv.net_dev = NULL; - tmp->priv.pops = &ix25_pops; - tmp->priv.flags = 0; - tmp->priv.proto_data = tmp; - return (&(tmp->priv)); - } - return NULL; -}; - -/* close the x25iface encapsulation protocol - */ -static int isdn_x25iface_proto_close(struct concap_proto *cprot) { - - ix25_pdata_t *tmp; - int ret = 0; - ulong flags; - - if (!cprot) { - printk(KERN_ERR "isdn_x25iface_proto_close: " - "invalid concap_proto pointer\n"); - return -1; - } - IX25DEBUG("isdn_x25iface_proto_close %s \n", MY_DEVNAME(cprot->net_dev)); - spin_lock_irqsave(&cprot->lock, flags); - cprot->dops = NULL; - cprot->net_dev = NULL; - tmp = cprot->proto_data; - if (pdata_is_bad(tmp)) { - ret = -1; - } else { - tmp->state = WAN_UNCONFIGURED; - } - spin_unlock_irqrestore(&cprot->lock, flags); - return ret; -} - -/* Delete the x25iface encapsulation protocol instance - */ -static void isdn_x25iface_proto_del(struct concap_proto *cprot) { - - ix25_pdata_t *tmp; - - IX25DEBUG("isdn_x25iface_proto_del \n"); - if (!cprot) { - printk(KERN_ERR "isdn_x25iface_proto_del: " - "concap_proto pointer is NULL\n"); - return; - } - tmp = cprot->proto_data; - if (tmp == NULL) { - printk(KERN_ERR "isdn_x25iface_proto_del: inconsistent " - "proto_data pointer (maybe already deleted?)\n"); - return; - } - /* close if the protocol is still open */ - if (cprot->dops) isdn_x25iface_proto_close(cprot); - /* freeing the storage should be sufficient now. But some additional - settings might help to catch wild pointer bugs */ - tmp->magic = 0; - cprot->proto_data = NULL; - - kfree(tmp); - return; -} - -/* (re-)initialize the data structures for x25iface encapsulation - */ -static int isdn_x25iface_proto_restart(struct concap_proto *cprot, - struct net_device *ndev, - struct concap_device_ops *dops) -{ - ix25_pdata_t *pda = cprot->proto_data; - ulong flags; - - IX25DEBUG("isdn_x25iface_proto_restart %s \n", MY_DEVNAME(ndev)); - - if (pdata_is_bad(pda)) return -1; - - if (!(dops && dops->data_req && dops->connect_req - && dops->disconn_req)) { - printk(KERN_WARNING "isdn_x25iface_restart: required dops" - " missing\n"); - isdn_x25iface_proto_close(cprot); - return -1; - } - spin_lock_irqsave(&cprot->lock, flags); - cprot->net_dev = ndev; - cprot->pops = &ix25_pops; - cprot->dops = dops; - pda->state = WAN_DISCONNECTED; - spin_unlock_irqrestore(&cprot->lock, flags); - return 0; -} - -/* deliver a dl_data frame received from i4l HL driver to the network layer - */ -static int isdn_x25iface_receive(struct concap_proto *cprot, struct sk_buff *skb) -{ - IX25DEBUG("isdn_x25iface_receive %s \n", MY_DEVNAME(cprot->net_dev)); - if (((ix25_pdata_t *)(cprot->proto_data)) - ->state == WAN_CONNECTED) { - if (skb_push(skb, 1)) { - skb->data[0] = X25_IFACE_DATA; - skb->protocol = x25_type_trans(skb, cprot->net_dev); - netif_rx(skb); - return 0; - } - } - printk(KERN_WARNING "isdn_x25iface_receive %s: not connected, skb dropped\n", MY_DEVNAME(cprot->net_dev)); - dev_kfree_skb(skb); - return -1; -} - -/* a connection set up is indicated by lower layer - */ -static int isdn_x25iface_connect_ind(struct concap_proto *cprot) -{ - struct sk_buff *skb; - enum wan_states *state_p - = &(((ix25_pdata_t *)(cprot->proto_data))->state); - IX25DEBUG("isdn_x25iface_connect_ind %s \n" - , MY_DEVNAME(cprot->net_dev)); - if (*state_p == WAN_UNCONFIGURED) { - printk(KERN_WARNING - "isdn_x25iface_connect_ind while unconfigured %s\n" - , MY_DEVNAME(cprot->net_dev)); - return -1; - } - *state_p = WAN_CONNECTED; - - skb = dev_alloc_skb(1); - if (skb) { - skb_put_u8(skb, X25_IFACE_CONNECT); - skb->protocol = x25_type_trans(skb, cprot->net_dev); - netif_rx(skb); - return 0; - } else { - printk(KERN_WARNING "isdn_x25iface_connect_ind: " - " out of memory -- disconnecting\n"); - cprot->dops->disconn_req(cprot); - return -1; - } -} - -/* a disconnect is indicated by lower layer - */ -static int isdn_x25iface_disconn_ind(struct concap_proto *cprot) -{ - struct sk_buff *skb; - enum wan_states *state_p - = &(((ix25_pdata_t *)(cprot->proto_data))->state); - IX25DEBUG("isdn_x25iface_disconn_ind %s \n", MY_DEVNAME(cprot->net_dev)); - if (*state_p == WAN_UNCONFIGURED) { - printk(KERN_WARNING - "isdn_x25iface_disconn_ind while unconfigured\n"); - return -1; - } - if (!cprot->net_dev) return -1; - *state_p = WAN_DISCONNECTED; - skb = dev_alloc_skb(1); - if (skb) { - skb_put_u8(skb, X25_IFACE_DISCONNECT); - skb->protocol = x25_type_trans(skb, cprot->net_dev); - netif_rx(skb); - return 0; - } else { - printk(KERN_WARNING "isdn_x25iface_disconn_ind:" - " out of memory\n"); - return -1; - } -} - -/* process a frame handed over to us from linux network layer. First byte - semantics as defined in Documentation/networking/x25-iface.txt -*/ -static int isdn_x25iface_xmit(struct concap_proto *cprot, struct sk_buff *skb) -{ - unsigned char firstbyte = skb->data[0]; - enum wan_states *state = &((ix25_pdata_t *)cprot->proto_data)->state; - int ret = 0; - IX25DEBUG("isdn_x25iface_xmit: %s first=%x state=%d\n", - MY_DEVNAME(cprot->net_dev), firstbyte, *state); - switch (firstbyte) { - case X25_IFACE_DATA: - if (*state == WAN_CONNECTED) { - skb_pull(skb, 1); - netif_trans_update(cprot->net_dev); - ret = (cprot->dops->data_req(cprot, skb)); - /* prepare for future retransmissions */ - if (ret) skb_push(skb, 1); - return ret; - } - illegal_state_warn(*state, firstbyte); - break; - case X25_IFACE_CONNECT: - if (*state == WAN_DISCONNECTED) { - *state = WAN_CONNECTING; - ret = cprot->dops->connect_req(cprot); - if (ret) { - /* reset state and notify upper layer about - * immidiatly failed attempts */ - isdn_x25iface_disconn_ind(cprot); - } - } else { - illegal_state_warn(*state, firstbyte); - } - break; - case X25_IFACE_DISCONNECT: - switch (*state) { - case WAN_DISCONNECTED: - /* Should not happen. However, give upper layer a - chance to recover from inconstistency but don't - trust the lower layer sending the disconn_confirm - when already disconnected */ - printk(KERN_WARNING "isdn_x25iface_xmit: disconnect " - " requested while disconnected\n"); - isdn_x25iface_disconn_ind(cprot); - break; /* prevent infinite loops */ - case WAN_CONNECTING: - case WAN_CONNECTED: - *state = WAN_DISCONNECTED; - cprot->dops->disconn_req(cprot); - break; - default: - illegal_state_warn(*state, firstbyte); - } - break; - case X25_IFACE_PARAMS: - printk(KERN_WARNING "isdn_x25iface_xmit: setting of lapb" - " options not yet supported\n"); - break; - default: - printk(KERN_WARNING "isdn_x25iface_xmit: frame with illegal" - " first byte %x ignored:\n", firstbyte); - } - dev_kfree_skb(skb); - return 0; -} diff --git a/drivers/isdn/i4l/isdn_x25iface.h b/drivers/isdn/i4l/isdn_x25iface.h deleted file mode 100644 index ca08e082cf7c..000000000000 --- a/drivers/isdn/i4l/isdn_x25iface.h +++ /dev/null @@ -1,30 +0,0 @@ -/* $Id: isdn_x25iface.h,v 1.1.2.2 2004/01/12 22:37:19 keil Exp $ - * - * header for Linux ISDN subsystem, x.25 related functions - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _LINUX_ISDN_X25IFACE_H -#define _LINUX_ISDN_X25IFACE_H - -#define ISDN_X25IFACE_MAGIC 0x1e75a2b9 -/* #define DEBUG_ISDN_X25 if you want isdn_x25 debugging messages */ -#ifdef DEBUG_ISDN_X25 -# define IX25DEBUG(fmt, args...) printk(KERN_DEBUG fmt, ##args) -#else -# define IX25DEBUG(fmt, args...) -#endif - -#include -#include -#include - -extern struct concap_proto_ops *isdn_x25iface_concap_proto_ops_pt; -extern struct concap_proto *isdn_x25iface_proto_new(void); - - - -#endif diff --git a/drivers/isdn/isdnloop/Makefile b/drivers/isdn/isdnloop/Makefile deleted file mode 100644 index 5ff4c0e09768..000000000000 --- a/drivers/isdn/isdnloop/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# Makefile for the isdnloop ISDN device driver - -# Each configuration option enables a list of files. - -obj-$(CONFIG_ISDN_DRV_LOOP) += isdnloop.o diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c deleted file mode 100644 index 755c6bbc9553..000000000000 --- a/drivers/isdn/isdnloop/isdnloop.c +++ /dev/null @@ -1,1528 +0,0 @@ -/* $Id: isdnloop.c,v 1.11.6.7 2001/11/11 19:54:31 kai Exp $ - * - * ISDN low-level module implementing a dummy loop driver. - * - * Copyright 1997 by Fritz Elfert (fritz@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include -#include -#include -#include -#include -#include "isdnloop.h" - -static char *isdnloop_id = "loop0"; - -MODULE_DESCRIPTION("ISDN4Linux: Pseudo Driver that simulates an ISDN card"); -MODULE_AUTHOR("Fritz Elfert"); -MODULE_LICENSE("GPL"); -module_param(isdnloop_id, charp, 0); -MODULE_PARM_DESC(isdnloop_id, "ID-String of first card"); - -static int isdnloop_addcard(char *); - -/* - * Free queue completely. - * - * Parameter: - * card = pointer to card struct - * channel = channel number - */ -static void -isdnloop_free_queue(isdnloop_card *card, int channel) -{ - struct sk_buff_head *queue = &card->bqueue[channel]; - - skb_queue_purge(queue); - card->sndcount[channel] = 0; -} - -/* - * Send B-Channel data to another virtual card. - * This routine is called via timer-callback from isdnloop_pollbchan(). - * - * Parameter: - * card = pointer to card struct. - * ch = channel number (0-based) - */ -static void -isdnloop_bchan_send(isdnloop_card *card, int ch) -{ - isdnloop_card *rcard = card->rcard[ch]; - int rch = card->rch[ch], len, ack; - struct sk_buff *skb; - isdn_ctrl cmd; - - while (card->sndcount[ch]) { - skb = skb_dequeue(&card->bqueue[ch]); - if (skb) { - len = skb->len; - card->sndcount[ch] -= len; - ack = *(skb->head); /* used as scratch area */ - cmd.driver = card->myid; - cmd.arg = ch; - if (rcard) { - rcard->interface.rcvcallb_skb(rcard->myid, rch, skb); - } else { - printk(KERN_WARNING "isdnloop: no rcard, skb dropped\n"); - dev_kfree_skb(skb); - - } - cmd.command = ISDN_STAT_BSENT; - cmd.parm.length = len; - card->interface.statcallb(&cmd); - } else - card->sndcount[ch] = 0; - } -} - -/* - * Send/Receive Data to/from the B-Channel. - * This routine is called via timer-callback. - * It schedules itself while any B-Channel is open. - * - * Parameter: - * data = pointer to card struct, set by kernel timer.data - */ -static void -isdnloop_pollbchan(struct timer_list *t) -{ - isdnloop_card *card = from_timer(card, t, rb_timer); - unsigned long flags; - - if (card->flags & ISDNLOOP_FLAGS_B1ACTIVE) - isdnloop_bchan_send(card, 0); - if (card->flags & ISDNLOOP_FLAGS_B2ACTIVE) - isdnloop_bchan_send(card, 1); - if (card->flags & (ISDNLOOP_FLAGS_B1ACTIVE | ISDNLOOP_FLAGS_B2ACTIVE)) { - /* schedule b-channel polling again */ - spin_lock_irqsave(&card->isdnloop_lock, flags); - card->rb_timer.expires = jiffies + ISDNLOOP_TIMER_BCREAD; - add_timer(&card->rb_timer); - card->flags |= ISDNLOOP_FLAGS_RBTIMER; - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - } else - card->flags &= ~ISDNLOOP_FLAGS_RBTIMER; -} - -/* - * Parse ICN-type setup string and fill fields of setup-struct - * with parsed data. - * - * Parameter: - * setup = setup string, format: [caller-id],si1,si2,[called-id] - * cmd = pointer to struct to be filled. - */ -static void -isdnloop_parse_setup(char *setup, isdn_ctrl *cmd) -{ - char *t = setup; - char *s = strchr(t, ','); - - *s++ = '\0'; - strlcpy(cmd->parm.setup.phone, t, sizeof(cmd->parm.setup.phone)); - s = strchr(t = s, ','); - *s++ = '\0'; - if (!strlen(t)) - cmd->parm.setup.si1 = 0; - else - cmd->parm.setup.si1 = simple_strtoul(t, NULL, 10); - s = strchr(t = s, ','); - *s++ = '\0'; - if (!strlen(t)) - cmd->parm.setup.si2 = 0; - else - cmd->parm.setup.si2 = - simple_strtoul(t, NULL, 10); - strlcpy(cmd->parm.setup.eazmsn, s, sizeof(cmd->parm.setup.eazmsn)); - cmd->parm.setup.plan = 0; - cmd->parm.setup.screen = 0; -} - -typedef struct isdnloop_stat { - char *statstr; - int command; - int action; -} isdnloop_stat; -/* *INDENT-OFF* */ -static isdnloop_stat isdnloop_stat_table[] = { - {"BCON_", ISDN_STAT_BCONN, 1}, /* B-Channel connected */ - {"BDIS_", ISDN_STAT_BHUP, 2}, /* B-Channel disconnected */ - {"DCON_", ISDN_STAT_DCONN, 0}, /* D-Channel connected */ - {"DDIS_", ISDN_STAT_DHUP, 0}, /* D-Channel disconnected */ - {"DCAL_I", ISDN_STAT_ICALL, 3}, /* Incoming call dialup-line */ - {"DSCA_I", ISDN_STAT_ICALL, 3}, /* Incoming call 1TR6-SPV */ - {"FCALL", ISDN_STAT_ICALL, 4}, /* Leased line connection up */ - {"CIF", ISDN_STAT_CINF, 5}, /* Charge-info, 1TR6-type */ - {"AOC", ISDN_STAT_CINF, 6}, /* Charge-info, DSS1-type */ - {"CAU", ISDN_STAT_CAUSE, 7}, /* Cause code */ - {"TEI OK", ISDN_STAT_RUN, 0}, /* Card connected to wallplug */ - {"E_L1: ACT FAIL", ISDN_STAT_BHUP, 8}, /* Layer-1 activation failed */ - {"E_L2: DATA LIN", ISDN_STAT_BHUP, 8}, /* Layer-2 data link lost */ - {"E_L1: ACTIVATION FAILED", - ISDN_STAT_BHUP, 8}, /* Layer-1 activation failed */ - {NULL, 0, -1} -}; -/* *INDENT-ON* */ - - -/* - * Parse Status message-strings from virtual card. - * Depending on status, call statcallb for sending messages to upper - * levels. Also set/reset B-Channel active-flags. - * - * Parameter: - * status = status string to parse. - * channel = channel where message comes from. - * card = card where message comes from. - */ -static void -isdnloop_parse_status(u_char *status, int channel, isdnloop_card *card) -{ - isdnloop_stat *s = isdnloop_stat_table; - int action = -1; - isdn_ctrl cmd; - - while (s->statstr) { - if (!strncmp(status, s->statstr, strlen(s->statstr))) { - cmd.command = s->command; - action = s->action; - break; - } - s++; - } - if (action == -1) - return; - cmd.driver = card->myid; - cmd.arg = channel; - switch (action) { - case 1: - /* BCON_x */ - card->flags |= (channel) ? - ISDNLOOP_FLAGS_B2ACTIVE : ISDNLOOP_FLAGS_B1ACTIVE; - break; - case 2: - /* BDIS_x */ - card->flags &= ~((channel) ? - ISDNLOOP_FLAGS_B2ACTIVE : ISDNLOOP_FLAGS_B1ACTIVE); - isdnloop_free_queue(card, channel); - break; - case 3: - /* DCAL_I and DSCA_I */ - isdnloop_parse_setup(status + 6, &cmd); - break; - case 4: - /* FCALL */ - sprintf(cmd.parm.setup.phone, "LEASED%d", card->myid); - sprintf(cmd.parm.setup.eazmsn, "%d", channel + 1); - cmd.parm.setup.si1 = 7; - cmd.parm.setup.si2 = 0; - cmd.parm.setup.plan = 0; - cmd.parm.setup.screen = 0; - break; - case 5: - /* CIF */ - strlcpy(cmd.parm.num, status + 3, sizeof(cmd.parm.num)); - break; - case 6: - /* AOC */ - snprintf(cmd.parm.num, sizeof(cmd.parm.num), "%d", - (int) simple_strtoul(status + 7, NULL, 16)); - break; - case 7: - /* CAU */ - status += 3; - if (strlen(status) == 4) - snprintf(cmd.parm.num, sizeof(cmd.parm.num), "%s%c%c", - status + 2, *status, *(status + 1)); - else - strlcpy(cmd.parm.num, status + 1, sizeof(cmd.parm.num)); - break; - case 8: - /* Misc Errors on L1 and L2 */ - card->flags &= ~ISDNLOOP_FLAGS_B1ACTIVE; - isdnloop_free_queue(card, 0); - cmd.arg = 0; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - cmd.command = ISDN_STAT_DHUP; - cmd.arg = 0; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - cmd.command = ISDN_STAT_BHUP; - card->flags &= ~ISDNLOOP_FLAGS_B2ACTIVE; - isdnloop_free_queue(card, 1); - cmd.arg = 1; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - cmd.command = ISDN_STAT_DHUP; - cmd.arg = 1; - cmd.driver = card->myid; - break; - } - card->interface.statcallb(&cmd); -} - -/* - * Store a cwcharacter into ringbuffer for reading from /dev/isdnctrl - * - * Parameter: - * card = pointer to card struct. - * c = char to store. - */ -static void -isdnloop_putmsg(isdnloop_card *card, unsigned char c) -{ - ulong flags; - - spin_lock_irqsave(&card->isdnloop_lock, flags); - *card->msg_buf_write++ = (c == 0xff) ? '\n' : c; - if (card->msg_buf_write == card->msg_buf_read) { - if (++card->msg_buf_read > card->msg_buf_end) - card->msg_buf_read = card->msg_buf; - } - if (card->msg_buf_write > card->msg_buf_end) - card->msg_buf_write = card->msg_buf; - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -/* - * Poll a virtual cards message queue. - * If there are new status-replies from the card, copy them to - * ringbuffer for reading on /dev/isdnctrl and call - * isdnloop_parse_status() for processing them. Watch for special - * Firmware bootmessage and parse it, to get the D-Channel protocol. - * If there are B-Channels open, initiate a timer-callback to - * isdnloop_pollbchan(). - * This routine is called periodically via timer interrupt. - * - * Parameter: - * data = pointer to card struct - */ -static void -isdnloop_polldchan(struct timer_list *t) -{ - isdnloop_card *card = from_timer(card, t, st_timer); - struct sk_buff *skb; - int avail; - int left; - u_char c; - int ch; - unsigned long flags; - u_char *p; - isdn_ctrl cmd; - - skb = skb_dequeue(&card->dqueue); - if (skb) - avail = skb->len; - else - avail = 0; - for (left = avail; left > 0; left--) { - c = *skb->data; - skb_pull(skb, 1); - isdnloop_putmsg(card, c); - card->imsg[card->iptr] = c; - if (card->iptr < 59) - card->iptr++; - if (!skb->len) { - avail++; - isdnloop_putmsg(card, '\n'); - card->imsg[card->iptr] = 0; - card->iptr = 0; - if (card->imsg[0] == '0' && card->imsg[1] >= '0' && - card->imsg[1] <= '2' && card->imsg[2] == ';') { - ch = (card->imsg[1] - '0') - 1; - p = &card->imsg[3]; - isdnloop_parse_status(p, ch, card); - } else { - p = card->imsg; - if (!strncmp(p, "DRV1.", 5)) { - printk(KERN_INFO "isdnloop: (%s) %s\n", CID, p); - if (!strncmp(p + 7, "TC", 2)) { - card->ptype = ISDN_PTYPE_1TR6; - card->interface.features |= ISDN_FEATURE_P_1TR6; - printk(KERN_INFO - "isdnloop: (%s) 1TR6-Protocol loaded and running\n", CID); - } - if (!strncmp(p + 7, "EC", 2)) { - card->ptype = ISDN_PTYPE_EURO; - card->interface.features |= ISDN_FEATURE_P_EURO; - printk(KERN_INFO - "isdnloop: (%s) Euro-Protocol loaded and running\n", CID); - } - continue; - - } - } - } - } - if (avail) { - cmd.command = ISDN_STAT_STAVAIL; - cmd.driver = card->myid; - cmd.arg = avail; - card->interface.statcallb(&cmd); - } - if (card->flags & (ISDNLOOP_FLAGS_B1ACTIVE | ISDNLOOP_FLAGS_B2ACTIVE)) - if (!(card->flags & ISDNLOOP_FLAGS_RBTIMER)) { - /* schedule b-channel polling */ - card->flags |= ISDNLOOP_FLAGS_RBTIMER; - spin_lock_irqsave(&card->isdnloop_lock, flags); - del_timer(&card->rb_timer); - card->rb_timer.expires = jiffies + ISDNLOOP_TIMER_BCREAD; - add_timer(&card->rb_timer); - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - } - /* schedule again */ - spin_lock_irqsave(&card->isdnloop_lock, flags); - card->st_timer.expires = jiffies + ISDNLOOP_TIMER_DCREAD; - add_timer(&card->st_timer); - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -/* - * Append a packet to the transmit buffer-queue. - * - * Parameter: - * channel = Number of B-channel - * skb = packet to send. - * card = pointer to card-struct - * Return: - * Number of bytes transferred, -E??? on error - */ -static int -isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card *card) -{ - int len = skb->len; - unsigned long flags; - struct sk_buff *nskb; - - if (len > 4000) { - printk(KERN_WARNING - "isdnloop: Send packet too large\n"); - return -EINVAL; - } - if (len) { - if (!(card->flags & (channel ? ISDNLOOP_FLAGS_B2ACTIVE : ISDNLOOP_FLAGS_B1ACTIVE))) - return 0; - if (card->sndcount[channel] > ISDNLOOP_MAX_SQUEUE) - return 0; - spin_lock_irqsave(&card->isdnloop_lock, flags); - nskb = dev_alloc_skb(skb->len); - if (nskb) { - skb_copy_from_linear_data(skb, - skb_put(nskb, len), len); - skb_queue_tail(&card->bqueue[channel], nskb); - dev_kfree_skb(skb); - } else - len = 0; - card->sndcount[channel] += len; - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - } - return len; -} - -/* - * Read the messages from the card's ringbuffer - * - * Parameter: - * buf = pointer to buffer. - * len = number of bytes to read. - * user = flag, 1: called from userlevel 0: called from kernel. - * card = pointer to card struct. - * Return: - * number of bytes actually transferred. - */ -static int -isdnloop_readstatus(u_char __user *buf, int len, isdnloop_card *card) -{ - int count; - u_char __user *p; - - for (p = buf, count = 0; count < len; p++, count++) { - if (card->msg_buf_read == card->msg_buf_write) - return count; - if (put_user(*card->msg_buf_read++, p)) - return -EFAULT; - if (card->msg_buf_read > card->msg_buf_end) - card->msg_buf_read = card->msg_buf; - } - return count; -} - -/* - * Simulate a card's response by appending it to the cards - * message queue. - * - * Parameter: - * card = pointer to card struct. - * s = pointer to message-string. - * ch = channel: 0 = generic messages, 1 and 2 = D-channel messages. - * Return: - * 0 on success, 1 on memory squeeze. - */ -static int -isdnloop_fake(isdnloop_card *card, char *s, int ch) -{ - struct sk_buff *skb; - int len = strlen(s) + ((ch >= 0) ? 3 : 0); - skb = dev_alloc_skb(len); - if (!skb) { - printk(KERN_WARNING "isdnloop: Out of memory in isdnloop_fake\n"); - return 1; - } - if (ch >= 0) - sprintf(skb_put(skb, 3), "%02d;", ch); - skb_put_data(skb, s, strlen(s)); - skb_queue_tail(&card->dqueue, skb); - return 0; -} -/* *INDENT-OFF* */ -static isdnloop_stat isdnloop_cmd_table[] = { - {"BCON_R", 0, 1}, /* B-Channel connect */ - {"BCON_I", 0, 17}, /* B-Channel connect ind */ - {"BDIS_R", 0, 2}, /* B-Channel disconnect */ - {"DDIS_R", 0, 3}, /* D-Channel disconnect */ - {"DCON_R", 0, 16}, /* D-Channel connect */ - {"DSCA_R", 0, 4}, /* Dial 1TR6-SPV */ - {"DCAL_R", 0, 5}, /* Dial */ - {"EAZC", 0, 6}, /* Clear EAZ listener */ - {"EAZ", 0, 7}, /* Set EAZ listener */ - {"SEEAZ", 0, 8}, /* Get EAZ listener */ - {"MSN", 0, 9}, /* Set/Clear MSN listener */ - {"MSALL", 0, 10}, /* Set multi MSN listeners */ - {"SETSIL", 0, 11}, /* Set SI list */ - {"SEESIL", 0, 12}, /* Get SI list */ - {"SILC", 0, 13}, /* Clear SI list */ - {"LOCK", 0, -1}, /* LOCK channel */ - {"UNLOCK", 0, -1}, /* UNLOCK channel */ - {"FV2ON", 1, 14}, /* Leased mode on */ - {"FV2OFF", 1, 15}, /* Leased mode off */ - {NULL, 0, -1} -}; -/* *INDENT-ON* */ - - -/* - * Simulate an error-response from a card. - * - * Parameter: - * card = pointer to card struct. - */ -static void -isdnloop_fake_err(isdnloop_card *card) -{ - char buf[64]; - - snprintf(buf, sizeof(buf), "E%s", card->omsg); - isdnloop_fake(card, buf, -1); - isdnloop_fake(card, "NAK", -1); -} - -static u_char ctable_eu[] = {0x00, 0x11, 0x01, 0x12}; -static u_char ctable_1t[] = {0x00, 0x3b, 0x01, 0x3a}; - -/* - * Assemble a simplified cause message depending on the - * D-channel protocol used. - * - * Parameter: - * card = pointer to card struct. - * loc = location: 0 = local, 1 = remote. - * cau = cause: 1 = busy, 2 = nonexistent callerid, 3 = no user responding. - * Return: - * Pointer to buffer containing the assembled message. - */ -static char * -isdnloop_unicause(isdnloop_card *card, int loc, int cau) -{ - static char buf[6]; - - switch (card->ptype) { - case ISDN_PTYPE_EURO: - sprintf(buf, "E%02X%02X", (loc) ? 4 : 2, ctable_eu[cau]); - break; - case ISDN_PTYPE_1TR6: - sprintf(buf, "%02X44", ctable_1t[cau]); - break; - default: - return "0000"; - } - return buf; -} - -/* - * Release a virtual connection. Called from timer interrupt, when - * called party did not respond. - * - * Parameter: - * card = pointer to card struct. - * ch = channel (0-based) - */ -static void -isdnloop_atimeout(isdnloop_card *card, int ch) -{ - unsigned long flags; - char buf[60]; - - spin_lock_irqsave(&card->isdnloop_lock, flags); - if (card->rcard[ch]) { - isdnloop_fake(card->rcard[ch], "DDIS_I", card->rch[ch] + 1); - card->rcard[ch]->rcard[card->rch[ch]] = NULL; - card->rcard[ch] = NULL; - } - isdnloop_fake(card, "DDIS_I", ch + 1); - /* No user responding */ - sprintf(buf, "CAU%s", isdnloop_unicause(card, 1, 3)); - isdnloop_fake(card, buf, ch + 1); - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -/* - * Wrapper for isdnloop_atimeout(). - */ -static void -isdnloop_atimeout0(struct timer_list *t) -{ - isdnloop_card *card = from_timer(card, t, c_timer[0]); - - isdnloop_atimeout(card, 0); -} - -/* - * Wrapper for isdnloop_atimeout(). - */ -static void -isdnloop_atimeout1(struct timer_list *t) -{ - isdnloop_card *card = from_timer(card, t, c_timer[1]); - - isdnloop_atimeout(card, 1); -} - -/* - * Install a watchdog for a user, not responding. - * - * Parameter: - * card = pointer to card struct. - * ch = channel to watch for. - */ -static void -isdnloop_start_ctimer(isdnloop_card *card, int ch) -{ - unsigned long flags; - - spin_lock_irqsave(&card->isdnloop_lock, flags); - timer_setup(&card->c_timer[ch], ch ? isdnloop_atimeout1 - : isdnloop_atimeout0, 0); - card->c_timer[ch].expires = jiffies + ISDNLOOP_TIMER_ALERTWAIT; - add_timer(&card->c_timer[ch]); - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -/* - * Kill a pending channel watchdog. - * - * Parameter: - * card = pointer to card struct. - * ch = channel (0-based). - */ -static void -isdnloop_kill_ctimer(isdnloop_card *card, int ch) -{ - unsigned long flags; - - spin_lock_irqsave(&card->isdnloop_lock, flags); - del_timer(&card->c_timer[ch]); - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -static u_char si2bit[] = {0, 1, 0, 0, 0, 2, 0, 4, 0, 0}; -static u_char bit2si[] = {1, 5, 7}; - -/* - * Try finding a listener for an outgoing call. - * - * Parameter: - * card = pointer to calling card. - * p = pointer to ICN-type setup-string. - * lch = channel of calling card. - * cmd = pointer to struct to be filled when parsing setup. - * Return: - * 0 = found match, alerting should happen. - * 1 = found matching number but it is busy. - * 2 = no matching listener. - * 3 = found matching number but SI does not match. - */ -static int -isdnloop_try_call(isdnloop_card *card, char *p, int lch, isdn_ctrl *cmd) -{ - isdnloop_card *cc = cards; - unsigned long flags; - int ch; - int num_match; - int i; - char *e; - char nbuf[32]; - - isdnloop_parse_setup(p, cmd); - while (cc) { - for (ch = 0; ch < 2; ch++) { - /* Exclude ourself */ - if ((cc == card) && (ch == lch)) - continue; - num_match = 0; - switch (cc->ptype) { - case ISDN_PTYPE_EURO: - for (i = 0; i < 3; i++) - if (!(strcmp(cc->s0num[i], cmd->parm.setup.phone))) - num_match = 1; - break; - case ISDN_PTYPE_1TR6: - e = cc->eazlist[ch]; - while (*e) { - sprintf(nbuf, "%s%c", cc->s0num[0], *e); - if (!(strcmp(nbuf, cmd->parm.setup.phone))) - num_match = 1; - e++; - } - } - if (num_match) { - spin_lock_irqsave(&card->isdnloop_lock, flags); - /* channel idle? */ - if (!(cc->rcard[ch])) { - /* Check SI */ - if (!(si2bit[cmd->parm.setup.si1] & cc->sil[ch])) { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return 3; - } - /* ch is idle, si and number matches */ - cc->rcard[ch] = card; - cc->rch[ch] = lch; - card->rcard[lch] = cc; - card->rch[lch] = ch; - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return 0; - } else { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - /* num matches, but busy */ - if (ch == 1) - return 1; - } - } - } - cc = cc->next; - } - return 2; -} - -/* - * Depending on D-channel protocol and caller/called, modify - * phone number. - * - * Parameter: - * card = pointer to card struct. - * phone = pointer phone number. - * caller = flag: 1 = caller, 0 = called. - * Return: - * pointer to new phone number. - */ -static char * -isdnloop_vstphone(isdnloop_card *card, char *phone, int caller) -{ - int i; - static char nphone[30]; - - if (!card) { - printk("BUG!!!\n"); - return ""; - } - switch (card->ptype) { - case ISDN_PTYPE_EURO: - if (caller) { - for (i = 0; i < 2; i++) - if (!(strcmp(card->s0num[i], phone))) - return phone; - return card->s0num[0]; - } - return phone; - break; - case ISDN_PTYPE_1TR6: - if (caller) { - sprintf(nphone, "%s%c", card->s0num[0], phone[0]); - return nphone; - } else - return &phone[strlen(phone) - 1]; - break; - } - return ""; -} - -/* - * Parse an ICN-type command string sent to the 'card'. - * Perform misc. actions depending on the command. - * - * Parameter: - * card = pointer to card struct. - */ -static void -isdnloop_parse_cmd(isdnloop_card *card) -{ - char *p = card->omsg; - isdn_ctrl cmd; - char buf[60]; - isdnloop_stat *s = isdnloop_cmd_table; - int action = -1; - int i; - int ch; - - if ((card->omsg[0] != '0') && (card->omsg[2] != ';')) { - isdnloop_fake_err(card); - return; - } - ch = card->omsg[1] - '0'; - if ((ch < 0) || (ch > 2)) { - isdnloop_fake_err(card); - return; - } - p += 3; - while (s->statstr) { - if (!strncmp(p, s->statstr, strlen(s->statstr))) { - action = s->action; - if (s->command && (ch != 0)) { - isdnloop_fake_err(card); - return; - } - break; - } - s++; - } - if (action == -1) - return; - switch (action) { - case 1: - /* 0x;BCON_R */ - if (card->rcard[ch - 1]) { - isdnloop_fake(card->rcard[ch - 1], "BCON_I", - card->rch[ch - 1] + 1); - isdnloop_fake(card, "BCON_C", ch); - } - break; - case 17: - /* 0x;BCON_I */ - if (card->rcard[ch - 1]) { - isdnloop_fake(card->rcard[ch - 1], "BCON_C", - card->rch[ch - 1] + 1); - } - break; - case 2: - /* 0x;BDIS_R */ - isdnloop_fake(card, "BDIS_C", ch); - if (card->rcard[ch - 1]) { - isdnloop_fake(card->rcard[ch - 1], "BDIS_I", - card->rch[ch - 1] + 1); - } - break; - case 16: - /* 0x;DCON_R */ - isdnloop_kill_ctimer(card, ch - 1); - if (card->rcard[ch - 1]) { - isdnloop_kill_ctimer(card->rcard[ch - 1], card->rch[ch - 1]); - isdnloop_fake(card->rcard[ch - 1], "DCON_C", - card->rch[ch - 1] + 1); - isdnloop_fake(card, "DCON_C", ch); - } - break; - case 3: - /* 0x;DDIS_R */ - isdnloop_kill_ctimer(card, ch - 1); - if (card->rcard[ch - 1]) { - isdnloop_kill_ctimer(card->rcard[ch - 1], card->rch[ch - 1]); - isdnloop_fake(card->rcard[ch - 1], "DDIS_I", - card->rch[ch - 1] + 1); - card->rcard[ch - 1] = NULL; - } - isdnloop_fake(card, "DDIS_C", ch); - break; - case 4: - /* 0x;DSCA_Rdd,yy,zz,oo */ - if (card->ptype != ISDN_PTYPE_1TR6) { - isdnloop_fake_err(card); - return; - } - /* Fall through */ - case 5: - /* 0x;DCAL_Rdd,yy,zz,oo */ - p += 6; - switch (isdnloop_try_call(card, p, ch - 1, &cmd)) { - case 0: - /* Alerting */ - sprintf(buf, "D%s_I%s,%02d,%02d,%s", - (action == 4) ? "SCA" : "CAL", - isdnloop_vstphone(card, cmd.parm.setup.eazmsn, 1), - cmd.parm.setup.si1, - cmd.parm.setup.si2, - isdnloop_vstphone(card->rcard[ch - 1], - cmd.parm.setup.phone, 0)); - isdnloop_fake(card->rcard[ch - 1], buf, card->rch[ch - 1] + 1); - /* Fall through */ - case 3: - /* si1 does not match, don't alert but start timer */ - isdnloop_start_ctimer(card, ch - 1); - break; - case 1: - /* Remote busy */ - isdnloop_fake(card, "DDIS_I", ch); - sprintf(buf, "CAU%s", isdnloop_unicause(card, 1, 1)); - isdnloop_fake(card, buf, ch); - break; - case 2: - /* No such user */ - isdnloop_fake(card, "DDIS_I", ch); - sprintf(buf, "CAU%s", isdnloop_unicause(card, 1, 2)); - isdnloop_fake(card, buf, ch); - break; - } - break; - case 6: - /* 0x;EAZC */ - card->eazlist[ch - 1][0] = '\0'; - break; - case 7: - /* 0x;EAZ */ - p += 3; - if (strlen(p) >= sizeof(card->eazlist[0])) - break; - strcpy(card->eazlist[ch - 1], p); - break; - case 8: - /* 0x;SEEAZ */ - sprintf(buf, "EAZ-LIST: %s", card->eazlist[ch - 1]); - isdnloop_fake(card, buf, ch + 1); - break; - case 9: - /* 0x;MSN */ - break; - case 10: - /* 0x;MSNALL */ - break; - case 11: - /* 0x;SETSIL */ - p += 6; - i = 0; - while (strchr("0157", *p)) { - if (i) - card->sil[ch - 1] |= si2bit[*p - '0']; - i = (*p++ == '0'); - } - if (*p) - isdnloop_fake_err(card); - break; - case 12: - /* 0x;SEESIL */ - sprintf(buf, "SIN-LIST: "); - p = buf + 10; - for (i = 0; i < 3; i++) - if (card->sil[ch - 1] & (1 << i)) - p += sprintf(p, "%02d", bit2si[i]); - isdnloop_fake(card, buf, ch + 1); - break; - case 13: - /* 0x;SILC */ - card->sil[ch - 1] = 0; - break; - case 14: - /* 00;FV2ON */ - break; - case 15: - /* 00;FV2OFF */ - break; - } -} - -/* - * Put command-strings into the of the 'card'. In reality, execute them - * right in place by calling isdnloop_parse_cmd(). Also copy every - * command to the read message ringbuffer, preceding it with a '>'. - * These mesagges can be read at /dev/isdnctrl. - * - * Parameter: - * buf = pointer to command buffer. - * len = length of buffer data. - * user = flag: 1 = called form userlevel, 0 called from kernel. - * card = pointer to card struct. - * Return: - * number of bytes transferred (currently always equals len). - */ -static int -isdnloop_writecmd(const u_char *buf, int len, int user, isdnloop_card *card) -{ - int xcount = 0; - int ocount = 1; - isdn_ctrl cmd; - - while (len) { - int count = len; - u_char *p; - u_char msg[0x100]; - - if (count > 255) - count = 255; - if (user) { - if (copy_from_user(msg, buf, count)) - return -EFAULT; - } else - memcpy(msg, buf, count); - isdnloop_putmsg(card, '>'); - for (p = msg; count > 0; count--, p++) { - len--; - xcount++; - isdnloop_putmsg(card, *p); - card->omsg[card->optr] = *p; - if (*p == '\n') { - card->omsg[card->optr] = '\0'; - card->optr = 0; - isdnloop_parse_cmd(card); - if (len) { - isdnloop_putmsg(card, '>'); - ocount++; - } - } else { - if (card->optr < 59) - card->optr++; - } - ocount++; - } - } - cmd.command = ISDN_STAT_STAVAIL; - cmd.driver = card->myid; - cmd.arg = ocount; - card->interface.statcallb(&cmd); - return xcount; -} - -/* - * Delete card's pending timers, send STOP to linklevel - */ -static void -isdnloop_stopcard(isdnloop_card *card) -{ - unsigned long flags; - isdn_ctrl cmd; - - spin_lock_irqsave(&card->isdnloop_lock, flags); - if (card->flags & ISDNLOOP_FLAGS_RUNNING) { - card->flags &= ~ISDNLOOP_FLAGS_RUNNING; - del_timer(&card->st_timer); - del_timer(&card->rb_timer); - del_timer(&card->c_timer[0]); - del_timer(&card->c_timer[1]); - cmd.command = ISDN_STAT_STOP; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - } - spin_unlock_irqrestore(&card->isdnloop_lock, flags); -} - -/* - * Stop all cards before unload. - */ -static void -isdnloop_stopallcards(void) -{ - isdnloop_card *p = cards; - - while (p) { - isdnloop_stopcard(p); - p = p->next; - } -} - -/* - * Start a 'card'. Simulate card's boot message and set the phone - * number(s) of the virtual 'S0-Interface'. Install D-channel - * poll timer. - * - * Parameter: - * card = pointer to card struct. - * sdefp = pointer to struct holding ioctl parameters. - * Return: - * 0 on success, -E??? otherwise. - */ -static int -isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) -{ - unsigned long flags; - isdnloop_sdef sdef; - int i; - - if (card->flags & ISDNLOOP_FLAGS_RUNNING) - return -EBUSY; - if (copy_from_user((char *) &sdef, (char *) sdefp, sizeof(sdef))) - return -EFAULT; - - for (i = 0; i < 3; i++) { - if (!memchr(sdef.num[i], 0, sizeof(sdef.num[i]))) - return -EINVAL; - } - - spin_lock_irqsave(&card->isdnloop_lock, flags); - switch (sdef.ptype) { - case ISDN_PTYPE_EURO: - if (isdnloop_fake(card, "DRV1.23EC-Q.931-CAPI-CNS-BASIS-20.02.96", - -1)) { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return -ENOMEM; - } - card->sil[0] = card->sil[1] = 4; - if (isdnloop_fake(card, "TEI OK", 0)) { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return -ENOMEM; - } - for (i = 0; i < 3; i++) { - strlcpy(card->s0num[i], sdef.num[i], - sizeof(card->s0num[0])); - } - break; - case ISDN_PTYPE_1TR6: - if (isdnloop_fake(card, "DRV1.04TC-1TR6-CAPI-CNS-BASIS-29.11.95", - -1)) { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return -ENOMEM; - } - card->sil[0] = card->sil[1] = 4; - if (isdnloop_fake(card, "TEI OK", 0)) { - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return -ENOMEM; - } - strlcpy(card->s0num[0], sdef.num[0], sizeof(card->s0num[0])); - card->s0num[1][0] = '\0'; - card->s0num[2][0] = '\0'; - break; - default: - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - printk(KERN_WARNING "isdnloop: Illegal D-channel protocol %d\n", - sdef.ptype); - return -EINVAL; - } - timer_setup(&card->rb_timer, isdnloop_pollbchan, 0); - timer_setup(&card->st_timer, isdnloop_polldchan, 0); - card->st_timer.expires = jiffies + ISDNLOOP_TIMER_DCREAD; - add_timer(&card->st_timer); - card->flags |= ISDNLOOP_FLAGS_RUNNING; - spin_unlock_irqrestore(&card->isdnloop_lock, flags); - return 0; -} - -/* - * Main handler for commands sent by linklevel. - */ -static int -isdnloop_command(isdn_ctrl *c, isdnloop_card *card) -{ - ulong a; - int i; - char cbuf[80]; - isdn_ctrl cmd; - isdnloop_cdef cdef; - - switch (c->command) { - case ISDN_CMD_IOCTL: - memcpy(&a, c->parm.num, sizeof(ulong)); - switch (c->arg) { - case ISDNLOOP_IOCTL_DEBUGVAR: - return (ulong) card; - case ISDNLOOP_IOCTL_STARTUP: - return isdnloop_start(card, (isdnloop_sdef *) a); - break; - case ISDNLOOP_IOCTL_ADDCARD: - if (copy_from_user((char *)&cdef, - (char *)a, - sizeof(cdef))) - return -EFAULT; - return isdnloop_addcard(cdef.id1); - break; - case ISDNLOOP_IOCTL_LEASEDCFG: - if (a) { - if (!card->leased) { - card->leased = 1; - while (card->ptype == ISDN_PTYPE_UNKNOWN) - schedule_timeout_interruptible(10); - schedule_timeout_interruptible(10); - sprintf(cbuf, "00;FV2ON\n01;EAZ1\n02;EAZ2\n"); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - printk(KERN_INFO - "isdnloop: (%s) Leased-line mode enabled\n", - CID); - cmd.command = ISDN_STAT_RUN; - cmd.driver = card->myid; - cmd.arg = 0; - card->interface.statcallb(&cmd); - } - } else { - if (card->leased) { - card->leased = 0; - sprintf(cbuf, "00;FV2OFF\n"); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - printk(KERN_INFO - "isdnloop: (%s) Leased-line mode disabled\n", - CID); - cmd.command = ISDN_STAT_RUN; - cmd.driver = card->myid; - cmd.arg = 0; - card->interface.statcallb(&cmd); - } - } - return 0; - default: - return -EINVAL; - } - break; - case ISDN_CMD_DIAL: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (card->leased) - break; - if ((c->arg & 255) < ISDNLOOP_BCH) { - char *p; - char dcode[4]; - - a = c->arg; - p = c->parm.setup.phone; - if (*p == 's' || *p == 'S') { - /* Dial for SPV */ - p++; - strcpy(dcode, "SCA"); - } else - /* Normal Dial */ - strcpy(dcode, "CAL"); - snprintf(cbuf, sizeof(cbuf), - "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1), - dcode, p, c->parm.setup.si1, - c->parm.setup.si2, c->parm.setup.eazmsn); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - } - break; - case ISDN_CMD_ACCEPTD: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (c->arg < ISDNLOOP_BCH) { - a = c->arg + 1; - cbuf[0] = 0; - switch (card->l2_proto[a - 1]) { - case ISDN_PROTO_L2_X75I: - sprintf(cbuf, "%02d;BX75\n", (int) a); - break; -#ifdef CONFIG_ISDN_X25 - case ISDN_PROTO_L2_X25DTE: - sprintf(cbuf, "%02d;BX2T\n", (int) a); - break; - case ISDN_PROTO_L2_X25DCE: - sprintf(cbuf, "%02d;BX2C\n", (int) a); - break; -#endif - case ISDN_PROTO_L2_HDLC: - sprintf(cbuf, "%02d;BTRA\n", (int) a); - break; - } - if (strlen(cbuf)) - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - sprintf(cbuf, "%02d;DCON_R\n", (int) a); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - } - break; - case ISDN_CMD_ACCEPTB: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (c->arg < ISDNLOOP_BCH) { - a = c->arg + 1; - switch (card->l2_proto[a - 1]) { - case ISDN_PROTO_L2_X75I: - sprintf(cbuf, "%02d;BCON_R,BX75\n", (int) a); - break; -#ifdef CONFIG_ISDN_X25 - case ISDN_PROTO_L2_X25DTE: - sprintf(cbuf, "%02d;BCON_R,BX2T\n", (int) a); - break; - case ISDN_PROTO_L2_X25DCE: - sprintf(cbuf, "%02d;BCON_R,BX2C\n", (int) a); - break; -#endif - case ISDN_PROTO_L2_HDLC: - sprintf(cbuf, "%02d;BCON_R,BTRA\n", (int) a); - break; - default: - sprintf(cbuf, "%02d;BCON_R\n", (int) a); - } - printk(KERN_DEBUG "isdnloop writecmd '%s'\n", cbuf); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - break; - case ISDN_CMD_HANGUP: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (c->arg < ISDNLOOP_BCH) { - a = c->arg + 1; - sprintf(cbuf, "%02d;BDIS_R\n%02d;DDIS_R\n", (int) a, (int) a); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - } - break; - case ISDN_CMD_SETEAZ: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (card->leased) - break; - if (c->arg < ISDNLOOP_BCH) { - a = c->arg + 1; - if (card->ptype == ISDN_PTYPE_EURO) { - sprintf(cbuf, "%02d;MS%s%s\n", (int) a, - c->parm.num[0] ? "N" : "ALL", c->parm.num); - } else - sprintf(cbuf, "%02d;EAZ%s\n", (int) a, - c->parm.num[0] ? c->parm.num : (u_char *) "0123456789"); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - } - break; - case ISDN_CMD_CLREAZ: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if (card->leased) - break; - if (c->arg < ISDNLOOP_BCH) { - a = c->arg + 1; - if (card->ptype == ISDN_PTYPE_EURO) - sprintf(cbuf, "%02d;MSNC\n", (int) a); - else - sprintf(cbuf, "%02d;EAZC\n", (int) a); - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - } - break; - case ISDN_CMD_SETL2: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - if ((c->arg & 255) < ISDNLOOP_BCH) { - a = c->arg; - switch (a >> 8) { - case ISDN_PROTO_L2_X75I: - sprintf(cbuf, "%02d;BX75\n", (int) (a & 255) + 1); - break; -#ifdef CONFIG_ISDN_X25 - case ISDN_PROTO_L2_X25DTE: - sprintf(cbuf, "%02d;BX2T\n", (int) (a & 255) + 1); - break; - case ISDN_PROTO_L2_X25DCE: - sprintf(cbuf, "%02d;BX2C\n", (int) (a & 255) + 1); - break; -#endif - case ISDN_PROTO_L2_HDLC: - sprintf(cbuf, "%02d;BTRA\n", (int) (a & 255) + 1); - break; - case ISDN_PROTO_L2_TRANS: - sprintf(cbuf, "%02d;BTRA\n", (int) (a & 255) + 1); - break; - default: - return -EINVAL; - } - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); - card->l2_proto[a & 255] = (a >> 8); - } - break; - case ISDN_CMD_SETL3: - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - return 0; - default: - return -EINVAL; - } - } - return 0; -} - -/* - * Find card with given driverId - */ -static inline isdnloop_card * -isdnloop_findcard(int driverid) -{ - isdnloop_card *p = cards; - - while (p) { - if (p->myid == driverid) - return p; - p = p->next; - } - return (isdnloop_card *) 0; -} - -/* - * Wrapper functions for interface to linklevel - */ -static int -if_command(isdn_ctrl *c) -{ - isdnloop_card *card = isdnloop_findcard(c->driver); - - if (card) - return isdnloop_command(c, card); - printk(KERN_ERR - "isdnloop: if_command called with invalid driverId!\n"); - return -ENODEV; -} - -static int -if_writecmd(const u_char __user *buf, int len, int id, int channel) -{ - isdnloop_card *card = isdnloop_findcard(id); - - if (card) { - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - return isdnloop_writecmd(buf, len, 1, card); - } - printk(KERN_ERR - "isdnloop: if_writecmd called with invalid driverId!\n"); - return -ENODEV; -} - -static int -if_readstatus(u_char __user *buf, int len, int id, int channel) -{ - isdnloop_card *card = isdnloop_findcard(id); - - if (card) { - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - return isdnloop_readstatus(buf, len, card); - } - printk(KERN_ERR - "isdnloop: if_readstatus called with invalid driverId!\n"); - return -ENODEV; -} - -static int -if_sendbuf(int id, int channel, int ack, struct sk_buff *skb) -{ - isdnloop_card *card = isdnloop_findcard(id); - - if (card) { - if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) - return -ENODEV; - /* ack request stored in skb scratch area */ - *(skb->head) = ack; - return isdnloop_sendbuf(channel, skb, card); - } - printk(KERN_ERR - "isdnloop: if_sendbuf called with invalid driverId!\n"); - return -ENODEV; -} - -/* - * Allocate a new card-struct, initialize it - * link it into cards-list and register it at linklevel. - */ -static isdnloop_card * -isdnloop_initcard(char *id) -{ - isdnloop_card *card; - int i; - card = kzalloc(sizeof(isdnloop_card), GFP_KERNEL); - if (!card) { - printk(KERN_WARNING - "isdnloop: (%s) Could not allocate card-struct.\n", id); - return (isdnloop_card *) 0; - } - card->interface.owner = THIS_MODULE; - card->interface.channels = ISDNLOOP_BCH; - card->interface.hl_hdrlen = 1; /* scratch area for storing ack flag*/ - card->interface.maxbufsize = 4000; - card->interface.command = if_command; - card->interface.writebuf_skb = if_sendbuf; - card->interface.writecmd = if_writecmd; - card->interface.readstat = if_readstatus; - card->interface.features = ISDN_FEATURE_L2_X75I | -#ifdef CONFIG_ISDN_X25 - ISDN_FEATURE_L2_X25DTE | - ISDN_FEATURE_L2_X25DCE | -#endif - ISDN_FEATURE_L2_HDLC | - ISDN_FEATURE_L3_TRANS | - ISDN_FEATURE_P_UNKNOWN; - card->ptype = ISDN_PTYPE_UNKNOWN; - strlcpy(card->interface.id, id, sizeof(card->interface.id)); - card->msg_buf_write = card->msg_buf; - card->msg_buf_read = card->msg_buf; - card->msg_buf_end = &card->msg_buf[sizeof(card->msg_buf) - 1]; - for (i = 0; i < ISDNLOOP_BCH; i++) { - card->l2_proto[i] = ISDN_PROTO_L2_X75I; - skb_queue_head_init(&card->bqueue[i]); - } - skb_queue_head_init(&card->dqueue); - spin_lock_init(&card->isdnloop_lock); - card->next = cards; - cards = card; - if (!register_isdn(&card->interface)) { - cards = cards->next; - printk(KERN_WARNING - "isdnloop: Unable to register %s\n", id); - kfree(card); - return (isdnloop_card *) 0; - } - card->myid = card->interface.channels; - return card; -} - -static int -isdnloop_addcard(char *id1) -{ - isdnloop_card *card; - card = isdnloop_initcard(id1); - if (!card) { - return -EIO; - } - printk(KERN_INFO - "isdnloop: (%s) virtual card added\n", - card->interface.id); - return 0; -} - -static int __init -isdnloop_init(void) -{ - if (isdnloop_id) - return isdnloop_addcard(isdnloop_id); - - return 0; -} - -static void __exit -isdnloop_exit(void) -{ - isdn_ctrl cmd; - isdnloop_card *card = cards; - isdnloop_card *last; - int i; - - isdnloop_stopallcards(); - while (card) { - cmd.command = ISDN_STAT_UNLOAD; - cmd.driver = card->myid; - card->interface.statcallb(&cmd); - for (i = 0; i < ISDNLOOP_BCH; i++) - isdnloop_free_queue(card, i); - card = card->next; - } - card = cards; - while (card) { - last = card; - skb_queue_purge(&card->dqueue); - card = card->next; - kfree(last); - } - printk(KERN_NOTICE "isdnloop-ISDN-driver unloaded\n"); -} - -module_init(isdnloop_init); -module_exit(isdnloop_exit); diff --git a/drivers/isdn/isdnloop/isdnloop.h b/drivers/isdn/isdnloop/isdnloop.h deleted file mode 100644 index e9e035552bb4..000000000000 --- a/drivers/isdn/isdnloop/isdnloop.h +++ /dev/null @@ -1,112 +0,0 @@ -/* $Id: isdnloop.h,v 1.5.6.3 2001/09/23 22:24:56 kai Exp $ - * - * Loopback lowlevel module for testing of linklevel. - * - * Copyright 1997 by Fritz Elfert (fritz@isdn4linux.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef isdnloop_h -#define isdnloop_h - -#define ISDNLOOP_IOCTL_DEBUGVAR 0 -#define ISDNLOOP_IOCTL_ADDCARD 1 -#define ISDNLOOP_IOCTL_LEASEDCFG 2 -#define ISDNLOOP_IOCTL_STARTUP 3 - -/* Struct for adding new cards */ -typedef struct isdnloop_cdef { - char id1[10]; -} isdnloop_cdef; - -/* Struct for configuring cards */ -typedef struct isdnloop_sdef { - int ptype; - char num[3][20]; -} isdnloop_sdef; - -#if defined(__KERNEL__) || defined(__DEBUGVAR__) - -#ifdef __KERNEL__ -/* Kernel includes */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#endif /* __KERNEL__ */ - -#define ISDNLOOP_FLAGS_B1ACTIVE 1 /* B-Channel-1 is open */ -#define ISDNLOOP_FLAGS_B2ACTIVE 2 /* B-Channel-2 is open */ -#define ISDNLOOP_FLAGS_RUNNING 4 /* Cards driver activated */ -#define ISDNLOOP_FLAGS_RBTIMER 8 /* scheduling of B-Channel-poll */ -#define ISDNLOOP_TIMER_BCREAD 1 /* B-Channel poll-cycle */ -#define ISDNLOOP_TIMER_DCREAD (HZ/2) /* D-Channel poll-cycle */ -#define ISDNLOOP_TIMER_ALERTWAIT (10 * HZ) /* Alert timeout */ -#define ISDNLOOP_MAX_SQUEUE 65536 /* Max. outstanding send-data */ -#define ISDNLOOP_BCH 2 /* channels per card */ - -/* - * Per card driver data - */ -typedef struct isdnloop_card { - struct isdnloop_card *next; /* Pointer to next device struct */ - struct isdnloop_card - *rcard[ISDNLOOP_BCH]; /* Pointer to 'remote' card */ - int rch[ISDNLOOP_BCH]; /* 'remote' channel */ - int myid; /* Driver-Nr. assigned by linklevel */ - int leased; /* Flag: This Adapter is connected */ - /* to a leased line */ - int sil[ISDNLOOP_BCH]; /* SI's to listen for */ - char eazlist[ISDNLOOP_BCH][11]; - /* EAZ's to listen for */ - char s0num[3][20]; /* 1TR6 base-number or MSN's */ - unsigned short flags; /* Statusflags */ - int ptype; /* Protocol type (1TR6 or Euro) */ - struct timer_list st_timer; /* Timer for Status-Polls */ - struct timer_list rb_timer; /* Timer for B-Channel-Polls */ - struct timer_list - c_timer[ISDNLOOP_BCH]; /* Timer for Alerting */ - int l2_proto[ISDNLOOP_BCH]; /* Current layer-2-protocol */ - isdn_if interface; /* Interface to upper layer */ - int iptr; /* Index to imsg-buffer */ - char imsg[60]; /* Internal buf for status-parsing */ - int optr; /* Index to omsg-buffer */ - char omsg[60]; /* Internal buf for cmd-parsing */ - char msg_buf[2048]; /* Buffer for status-messages */ - char *msg_buf_write; /* Writepointer for statusbuffer */ - char *msg_buf_read; /* Readpointer for statusbuffer */ - char *msg_buf_end; /* Pointer to end of statusbuffer */ - int sndcount[ISDNLOOP_BCH]; /* Byte-counters for B-Ch.-send */ - struct sk_buff_head - bqueue[ISDNLOOP_BCH]; /* B-Channel queues */ - struct sk_buff_head dqueue; /* D-Channel queue */ - spinlock_t isdnloop_lock; -} isdnloop_card; - -/* - * Main driver data - */ -#ifdef __KERNEL__ -static isdnloop_card *cards = (isdnloop_card *) 0; -#endif /* __KERNEL__ */ - -/* Utility-Macros */ - -#define CID (card->interface.id) - -#endif /* defined(__KERNEL__) || defined(__DEBUGVAR__) */ -#endif /* isdnloop_h */ diff --git a/include/linux/concap.h b/include/linux/concap.h deleted file mode 100644 index 977acb3d1fb2..000000000000 --- a/include/linux/concap.h +++ /dev/null @@ -1,112 +0,0 @@ -/* $Id: concap.h,v 1.3.2.2 2004/01/12 23:08:35 keil Exp $ - * - * Copyright 1997 by Henner Eisen - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - */ - -#ifndef _LINUX_CONCAP_H -#define _LINUX_CONCAP_H - -#include -#include - -/* Stuff to support encapsulation protocols genericly. The encapsulation - protocol is processed at the uppermost layer of the network interface. - - Based on a ideas developed in a 'synchronous device' thread in the - linux-x25 mailing list contributed by Alan Cox, Thomasz Motylewski - and Jonathan Naylor. - - For more documetation on this refer to Documentation/isdn/README.concap -*/ - -struct concap_proto_ops; -struct concap_device_ops; - -/* this manages all data needed by the encapsulation protocol - */ -struct concap_proto{ - struct net_device *net_dev; /* net device using our service */ - struct concap_device_ops *dops; /* callbacks provided by device */ - struct concap_proto_ops *pops; /* callbacks provided by us */ - spinlock_t lock; - int flags; - void *proto_data; /* protocol specific private data, to - be accessed via *pops methods only*/ - /* - : - whatever - : - */ -}; - -/* Operations to be supported by the net device. Called by the encapsulation - * protocol entity. No receive method is offered because the encapsulation - * protocol directly calls netif_rx(). - */ -struct concap_device_ops{ - - /* to request data is submitted by device*/ - int (*data_req)(struct concap_proto *, struct sk_buff *); - - /* Control methods must be set to NULL by devices which do not - support connection control.*/ - /* to request a connection is set up */ - int (*connect_req)(struct concap_proto *); - - /* to request a connection is released */ - int (*disconn_req)(struct concap_proto *); -}; - -/* Operations to be supported by the encapsulation protocol. Called by - * device driver. - */ -struct concap_proto_ops{ - - /* create a new encapsulation protocol instance of same type */ - struct concap_proto * (*proto_new) (void); - - /* delete encapsulation protocol instance and free all its resources. - cprot may no loger be referenced after calling this */ - void (*proto_del)(struct concap_proto *cprot); - - /* initialize the protocol's data. To be called at interface startup - or when the device driver resets the interface. All services of the - encapsulation protocol may be used after this*/ - int (*restart)(struct concap_proto *cprot, - struct net_device *ndev, - struct concap_device_ops *dops); - - /* inactivate an encapsulation protocol instance. The encapsulation - protocol may not call any *dops methods after this. */ - int (*close)(struct concap_proto *cprot); - - /* process a frame handed down to us by upper layer */ - int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb); - - /* to be called for each data entity received from lower layer*/ - int (*data_ind)(struct concap_proto *cprot, struct sk_buff *skb); - - /* to be called when a connection was set up/down. - Protocols that don't process these primitives might fill in - dummy methods here */ - int (*connect_ind)(struct concap_proto *cprot); - int (*disconn_ind)(struct concap_proto *cprot); - /* - Some network device support functions, like net_header(), rebuild_header(), - and others, that depend solely on the encapsulation protocol, might - be provided here, too. The net device would just fill them in its - corresponding fields when it is opened. - */ -}; - -/* dummy restart/close/connect/reset/disconn methods - */ -extern int concap_nop(struct concap_proto *cprot); - -/* dummy submit method - */ -extern int concap_drop_skb(struct concap_proto *cprot, struct sk_buff *skb); -#endif diff --git a/include/linux/isdn.h b/include/linux/isdn.h deleted file mode 100644 index df97c8444f5d..000000000000 --- a/include/linux/isdn.h +++ /dev/null @@ -1,473 +0,0 @@ -/* $Id: isdn.h,v 1.125.2.3 2004/02/10 01:07:14 keil Exp $ - * - * Main header for the Linux ISDN subsystem (linklevel). - * - * Copyright 1994,95,96 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ -#ifndef __ISDN_H__ -#define __ISDN_H__ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ISDN_TTY_MAJOR 43 -#define ISDN_TTYAUX_MAJOR 44 -#define ISDN_MAJOR 45 - -/* The minor-devicenumbers for Channel 0 and 1 are used as arguments for - * physical Channel-Mapping, so they MUST NOT be changed without changing - * the correspondent code in isdn.c - */ - -#define ISDN_MINOR_B 0 -#define ISDN_MINOR_BMAX (ISDN_MAX_CHANNELS-1) -#define ISDN_MINOR_CTRL 64 -#define ISDN_MINOR_CTRLMAX (64 + (ISDN_MAX_CHANNELS-1)) -#define ISDN_MINOR_PPP 128 -#define ISDN_MINOR_PPPMAX (128 + (ISDN_MAX_CHANNELS-1)) -#define ISDN_MINOR_STATUS 255 - -#ifdef CONFIG_ISDN_PPP - -#ifdef CONFIG_ISDN_PPP_VJ -# include -#endif - -#include -#include - -#include -#endif - -#ifdef CONFIG_ISDN_X25 -# include -#endif - -#include - -#define ISDN_DRVIOCTL_MASK 0x7f /* Mask for Device-ioctl */ - -/* Until now unused */ -#define ISDN_SERVICE_VOICE 1 -#define ISDN_SERVICE_AB 1<<1 -#define ISDN_SERVICE_X21 1<<2 -#define ISDN_SERVICE_G4 1<<3 -#define ISDN_SERVICE_BTX 1<<4 -#define ISDN_SERVICE_DFUE 1<<5 -#define ISDN_SERVICE_X25 1<<6 -#define ISDN_SERVICE_TTX 1<<7 -#define ISDN_SERVICE_MIXED 1<<8 -#define ISDN_SERVICE_FW 1<<9 -#define ISDN_SERVICE_GTEL 1<<10 -#define ISDN_SERVICE_BTXN 1<<11 -#define ISDN_SERVICE_BTEL 1<<12 - -/* Macros checking plain usage */ -#define USG_NONE(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_NONE) -#define USG_RAW(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_RAW) -#define USG_MODEM(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_MODEM) -#define USG_VOICE(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_VOICE) -#define USG_NET(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_NET) -#define USG_FAX(x) ((x & ISDN_USAGE_MASK)==ISDN_USAGE_FAX) -#define USG_OUTGOING(x) ((x & ISDN_USAGE_OUTGOING)==ISDN_USAGE_OUTGOING) -#define USG_MODEMORVOICE(x) (((x & ISDN_USAGE_MASK)==ISDN_USAGE_MODEM) || \ - ((x & ISDN_USAGE_MASK)==ISDN_USAGE_VOICE) ) - -/* Timer-delays and scheduling-flags */ -#define ISDN_TIMER_RES 4 /* Main Timer-Resolution */ -#define ISDN_TIMER_02SEC (HZ/ISDN_TIMER_RES/5) /* Slow-Timer1 .2 sec */ -#define ISDN_TIMER_1SEC (HZ/ISDN_TIMER_RES) /* Slow-Timer2 1 sec */ -#define ISDN_TIMER_RINGING 5 /* tty RINGs = ISDN_TIMER_1SEC * this factor */ -#define ISDN_TIMER_KEEPINT 10 /* Cisco-Keepalive = ISDN_TIMER_1SEC * this factor */ -#define ISDN_TIMER_MODEMREAD 1 -#define ISDN_TIMER_MODEMPLUS 2 -#define ISDN_TIMER_MODEMRING 4 -#define ISDN_TIMER_MODEMXMIT 8 -#define ISDN_TIMER_NETDIAL 16 -#define ISDN_TIMER_NETHANGUP 32 -#define ISDN_TIMER_CARRIER 256 /* Wait for Carrier */ -#define ISDN_TIMER_FAST (ISDN_TIMER_MODEMREAD | ISDN_TIMER_MODEMPLUS | \ - ISDN_TIMER_MODEMXMIT) -#define ISDN_TIMER_SLOW (ISDN_TIMER_MODEMRING | ISDN_TIMER_NETHANGUP | \ - ISDN_TIMER_NETDIAL | ISDN_TIMER_CARRIER) - -/* Timeout-Values for isdn_net_dial() */ -#define ISDN_TIMER_DTIMEOUT10 (10*HZ/(ISDN_TIMER_02SEC*(ISDN_TIMER_RES+1))) -#define ISDN_TIMER_DTIMEOUT15 (15*HZ/(ISDN_TIMER_02SEC*(ISDN_TIMER_RES+1))) -#define ISDN_TIMER_DTIMEOUT60 (60*HZ/(ISDN_TIMER_02SEC*(ISDN_TIMER_RES+1))) - -/* GLOBAL_FLAGS */ -#define ISDN_GLOBAL_STOPPED 1 - -/*=================== Start of ip-over-ISDN stuff =========================*/ - -/* Feature- and status-flags for a net-interface */ -#define ISDN_NET_CONNECTED 0x01 /* Bound to ISDN-Channel */ -#define ISDN_NET_SECURE 0x02 /* Accept calls from phonelist only */ -#define ISDN_NET_CALLBACK 0x04 /* activate callback */ -#define ISDN_NET_CBHUP 0x08 /* hangup before callback */ -#define ISDN_NET_CBOUT 0x10 /* remote machine does callback */ - -#define ISDN_NET_MAGIC 0x49344C02 /* for paranoia-checking */ - -/* Phone-list-element */ -typedef struct { - void *next; - char num[ISDN_MSNLEN]; -} isdn_net_phone; - -/* - Principles when extending structures for generic encapsulation protocol - ("concap") support: - - Stuff which is hardware specific (here i4l-specific) goes in - the netdev -> local structure (here: isdn_net_local) - - Stuff which is encapsulation protocol specific goes in the structure - which holds the linux device structure (here: isdn_net_device) -*/ - -/* Local interface-data */ -typedef struct isdn_net_local_s { - ulong magic; - struct net_device_stats stats; /* Ethernet Statistics */ - int isdn_device; /* Index to isdn-device */ - int isdn_channel; /* Index to isdn-channel */ - int ppp_slot; /* PPPD device slot number */ - int pre_device; /* Preselected isdn-device */ - int pre_channel; /* Preselected isdn-channel */ - int exclusive; /* If non-zero idx to reserved chan.*/ - int flags; /* Connection-flags */ - int dialretry; /* Counter for Dialout-retries */ - int dialmax; /* Max. Number of Dial-retries */ - int cbdelay; /* Delay before Callback starts */ - int dtimer; /* Timeout-counter for dialing */ - char msn[ISDN_MSNLEN]; /* MSNs/EAZs for this interface */ - u_char cbhup; /* Flag: Reject Call before Callback*/ - u_char dialstate; /* State for dialing */ - u_char p_encap; /* Packet encapsulation */ - /* 0 = Ethernet over ISDN */ - /* 1 = RAW-IP */ - /* 2 = IP with type field */ - u_char l2_proto; /* Layer-2-protocol */ - /* See ISDN_PROTO_L2..-constants in */ - /* isdnif.h */ - /* 0 = X75/LAPB with I-Frames */ - /* 1 = X75/LAPB with UI-Frames */ - /* 2 = X75/LAPB with BUI-Frames */ - /* 3 = HDLC */ - u_char l3_proto; /* Layer-3-protocol */ - /* See ISDN_PROTO_L3..-constants in */ - /* isdnif.h */ - /* 0 = Transparent */ - int huptimer; /* Timeout-counter for auto-hangup */ - int charge; /* Counter for charging units */ - ulong chargetime; /* Timer for Charging info */ - int hupflags; /* Flags for charge-unit-hangup: */ - /* bit0: chargeint is invalid */ - /* bit1: Getting charge-interval */ - /* bit2: Do charge-unit-hangup */ - /* bit3: Do hangup even on incoming */ - int outgoing; /* Flag: outgoing call */ - int onhtime; /* Time to keep link up */ - int chargeint; /* Interval between charge-infos */ - int onum; /* Flag: at least 1 outgoing number */ - int cps; /* current speed of this interface */ - int transcount; /* byte-counter for cps-calculation */ - int sqfull; /* Flag: netdev-queue overloaded */ - ulong sqfull_stamp; /* Start-Time of overload */ - ulong slavedelay; /* Dynamic bundling delaytime */ - int triggercps; /* BogoCPS needed for trigger slave */ - isdn_net_phone *phone[2]; /* List of remote-phonenumbers */ - /* phone[0] = Incoming Numbers */ - /* phone[1] = Outgoing Numbers */ - isdn_net_phone *dial; /* Pointer to dialed number */ - struct net_device *master; /* Ptr to Master device for slaves */ - struct net_device *slave; /* Ptr to Slave device for masters */ - struct isdn_net_local_s *next; /* Ptr to next link in bundle */ - struct isdn_net_local_s *last; /* Ptr to last link in bundle */ - struct isdn_net_dev_s *netdev; /* Ptr to netdev */ - struct sk_buff_head super_tx_queue; /* List of supervisory frames to */ - /* be transmitted asap */ - atomic_t frame_cnt; /* number of frames currently */ - /* queued in HL driver */ - /* Ptr to orig. hard_header_cache */ - spinlock_t xmit_lock; /* used to protect the xmit path of */ - /* a particular channel (including */ - /* the frame_cnt */ - - int pppbind; /* ippp device for bindings */ - int dialtimeout; /* How long shall we try on dialing? (jiffies) */ - int dialwait; /* How long shall we wait after failed attempt? (jiffies) */ - ulong dialstarted; /* jiffies of first dialing-attempt */ - ulong dialwait_timer; /* jiffies of earliest next dialing-attempt */ - int huptimeout; /* How long will the connection be up? (seconds) */ -#ifdef CONFIG_ISDN_X25 - struct concap_device_ops *dops; /* callbacks used by encapsulator */ -#endif - /* use an own struct for that in later versions */ - ulong cisco_myseq; /* Local keepalive seq. for Cisco */ - ulong cisco_mineseen; /* returned keepalive seq. from remote */ - ulong cisco_yourseq; /* Remote keepalive seq. for Cisco */ - int cisco_keepalive_period; /* keepalive period */ - ulong cisco_last_slarp_in; /* jiffie of last keepalive packet we received */ - char cisco_line_state; /* state of line according to keepalive packets */ - char cisco_debserint; /* debugging flag of cisco hdlc with slarp */ - struct timer_list cisco_timer; - struct work_struct tqueue; -} isdn_net_local; - -/* the interface itself */ -typedef struct isdn_net_dev_s { - isdn_net_local *local; - isdn_net_local *queue; /* circular list of all bundled - channels, which are currently - online */ - spinlock_t queue_lock; /* lock to protect queue */ - void *next; /* Pointer to next isdn-interface */ - struct net_device *dev; /* interface to upper levels */ -#ifdef CONFIG_ISDN_PPP - ippp_bundle * pb; /* pointer to the common bundle structure - * with the per-bundle data */ -#endif -#ifdef CONFIG_ISDN_X25 - struct concap_proto *cprot; /* connection oriented encapsulation protocol */ -#endif - -} isdn_net_dev; - -/*===================== End of ip-over-ISDN stuff ===========================*/ - -/*======================= Start of ISDN-tty stuff ===========================*/ - -#define ISDN_ASYNC_MAGIC 0x49344C01 /* for paranoia-checking */ -#define ISDN_SERIAL_XMIT_SIZE 1024 /* Default bufsize for write */ -#define ISDN_SERIAL_XMIT_MAX 4000 /* Maximum bufsize for write */ - -#ifdef CONFIG_ISDN_AUDIO -/* For using sk_buffs with audio we need some private variables - * within each sk_buff. For this purpose, we declare a struct here, - * and put it always at the private skb->cb data array. A few macros help - * accessing the variables. - */ -typedef struct _isdn_audio_data { - unsigned short dle_count; - unsigned char lock; -} isdn_audio_data_t; - -#define ISDN_AUDIO_SKB_DLECOUNT(skb) (((isdn_audio_data_t *)&skb->cb[0])->dle_count) -#define ISDN_AUDIO_SKB_LOCK(skb) (((isdn_audio_data_t *)&skb->cb[0])->lock) -#endif - -/* Private data of AT-command-interpreter */ -typedef struct atemu { - u_char profile[ISDN_MODEM_NUMREG]; /* Modem-Regs. Profile 0 */ - u_char mdmreg[ISDN_MODEM_NUMREG]; /* Modem-Registers */ - char pmsn[ISDN_MSNLEN]; /* EAZ/MSNs Profile 0 */ - char msn[ISDN_MSNLEN]; /* EAZ/MSN */ - char plmsn[ISDN_LMSNLEN]; /* Listening MSNs Profile 0 */ - char lmsn[ISDN_LMSNLEN]; /* Listening MSNs */ - char cpn[ISDN_MSNLEN]; /* CalledPartyNumber on incoming call */ - char connmsg[ISDN_CMSGLEN]; /* CONNECT-Msg from HL-Driver */ -#ifdef CONFIG_ISDN_AUDIO - u_char vpar[10]; /* Voice-parameters */ - int lastDLE; /* Flag for voice-coding: DLE seen */ -#endif - int mdmcmdl; /* Length of Modem-Commandbuffer */ - int pluscount; /* Counter for +++ sequence */ - u_long lastplus; /* Timestamp of last + */ - int carrierwait; /* Seconds of carrier waiting */ - char mdmcmd[255]; /* Modem-Commandbuffer */ - unsigned int charge; /* Charge units of current connection */ -} atemu; - -/* Private data (similar to async_struct in ) */ -typedef struct modem_info { - int magic; - struct tty_port port; - int x_char; /* xon/xoff character */ - int mcr; /* Modem control register */ - int msr; /* Modem status register */ - int lsr; /* Line status register */ - int line; - int online; /* 1 = B-Channel is up, drop data */ - /* 2 = B-Channel is up, deliver d.*/ - int dialing; /* Dial in progress or ATA */ - int closing; - int rcvsched; /* Receive needs schedule */ - int isdn_driver; /* Index to isdn-driver */ - int isdn_channel; /* Index to isdn-channel */ - int drv_index; /* Index to dev->usage */ - int ncarrier; /* Flag: schedule NO CARRIER */ - unsigned char last_cause[8]; /* Last cause message */ - unsigned char last_num[ISDN_MSNLEN]; - /* Last phone-number */ - unsigned char last_l2; /* Last layer-2 protocol */ - unsigned char last_si; /* Last service */ - unsigned char last_lhup; /* Last hangup local? */ - unsigned char last_dir; /* Last direction (in or out) */ - struct timer_list nc_timer; /* Timer for delayed NO CARRIER */ - int send_outstanding;/* # of outstanding send-requests */ - int xmit_size; /* max. # of chars in xmit_buf */ - int xmit_count; /* # of chars in xmit_buf */ - struct sk_buff_head xmit_queue; /* transmit queue */ - atomic_t xmit_lock; /* Semaphore for isdn_tty_write */ -#ifdef CONFIG_ISDN_AUDIO - int vonline; /* Voice-channel status */ - /* Bit 0 = recording */ - /* Bit 1 = playback */ - /* Bit 2 = playback, DLE-ETX seen */ - struct sk_buff_head dtmf_queue; /* queue for dtmf results */ - void *adpcms; /* state for adpcm decompression */ - void *adpcmr; /* state for adpcm compression */ - void *dtmf_state; /* state for dtmf decoder */ - void *silence_state; /* state for silence detection */ -#endif -#ifdef CONFIG_ISDN_TTY_FAX - struct T30_s *fax; /* T30 Fax Group 3 data/interface */ - int faxonline; /* Fax-channel status */ -#endif - atemu emu; /* AT-emulator data */ - spinlock_t readlock; -} modem_info; - -#define ISDN_MODEM_WINSIZE 8 - -/* Description of one ISDN-tty */ -typedef struct _isdn_modem { - int refcount; /* Number of opens */ - struct tty_driver *tty_modem; /* tty-device */ - struct tty_struct *modem_table[ISDN_MAX_CHANNELS]; /* ?? copied from Orig */ - struct ktermios *modem_termios[ISDN_MAX_CHANNELS]; - struct ktermios *modem_termios_locked[ISDN_MAX_CHANNELS]; - modem_info info[ISDN_MAX_CHANNELS]; /* Private data */ -} isdn_modem_t; - -/*======================= End of ISDN-tty stuff ============================*/ - -/*======================== Start of V.110 stuff ============================*/ -#define V110_BUFSIZE 1024 - -typedef struct { - int nbytes; /* 1 Matrixbyte -> nbytes in stream */ - int nbits; /* Number of used bits in streambyte */ - unsigned char key; /* Bitmask in stream eg. 11 (nbits=2) */ - int decodelen; /* Amount of data in decodebuf */ - int SyncInit; /* Number of sync frames to send */ - unsigned char *OnlineFrame; /* Precalculated V110 idle frame */ - unsigned char *OfflineFrame; /* Precalculated V110 sync Frame */ - int framelen; /* Length of frames */ - int skbuser; /* Number of unacked userdata skbs */ - int skbidle; /* Number of unacked idle/sync skbs */ - int introducer; /* Local vars for decoder */ - int dbit; - unsigned char b; - int skbres; /* space to reserve in outgoing skb */ - int maxsize; /* maxbufsize of lowlevel driver */ - unsigned char *encodebuf; /* temporary buffer for encoding */ - unsigned char decodebuf[V110_BUFSIZE]; /* incomplete V110 matrices */ -} isdn_v110_stream; - -/*========================= End of V.110 stuff =============================*/ - -/*======================= Start of general stuff ===========================*/ - -typedef struct { - char *next; - char *private; -} infostruct; - -#define DRV_FLAG_RUNNING 1 -#define DRV_FLAG_REJBUS 2 -#define DRV_FLAG_LOADED 4 - -/* Description of hardware-level-driver */ -typedef struct _isdn_driver { - ulong online; /* Channel-Online flags */ - ulong flags; /* Misc driver Flags */ - int locks; /* Number of locks for this driver */ - int channels; /* Number of channels */ - wait_queue_head_t st_waitq; /* Wait-Queue for status-read's */ - int maxbufsize; /* Maximum Buffersize supported */ - unsigned long pktcount; /* Until now: unused */ - int stavail; /* Chars avail on Status-device */ - isdn_if *interface; /* Interface to driver */ - int *rcverr; /* Error-counters for B-Ch.-receive */ - int *rcvcount; /* Byte-counters for B-Ch.-receive */ -#ifdef CONFIG_ISDN_AUDIO - unsigned long DLEflag; /* Flags: Insert DLE at next read */ -#endif - struct sk_buff_head *rpqueue; /* Pointers to start of Rcv-Queue */ - wait_queue_head_t *rcv_waitq; /* Wait-Queues for B-Channel-Reads */ - wait_queue_head_t *snd_waitq; /* Wait-Queue for B-Channel-Send's */ - char msn2eaz[10][ISDN_MSNLEN]; /* Mapping-Table MSN->EAZ */ -} isdn_driver_t; - -/* Main driver-data */ -typedef struct isdn_devt { - struct module *owner; - spinlock_t lock; - unsigned short flags; /* Bitmapped Flags: */ - int drivers; /* Current number of drivers */ - int channels; /* Current number of channels */ - int net_verbose; /* Verbose-Flag */ - int modempoll; /* Flag: tty-read active */ - spinlock_t timerlock; - int tflags; /* Timer-Flags: */ - /* see ISDN_TIMER_..defines */ - int global_flags; - infostruct *infochain; /* List of open info-devs. */ - wait_queue_head_t info_waitq; /* Wait-Queue for isdninfo */ - struct timer_list timer; /* Misc.-function Timer */ - int chanmap[ISDN_MAX_CHANNELS]; /* Map minor->device-channel */ - int drvmap[ISDN_MAX_CHANNELS]; /* Map minor->driver-index */ - int usage[ISDN_MAX_CHANNELS]; /* Used by tty/ip/voice */ - char num[ISDN_MAX_CHANNELS][ISDN_MSNLEN]; - /* Remote number of active ch.*/ - int m_idx[ISDN_MAX_CHANNELS]; /* Index for mdm.... */ - isdn_driver_t *drv[ISDN_MAX_DRIVERS]; /* Array of drivers */ - isdn_net_dev *netdev; /* Linked list of net-if's */ - char drvid[ISDN_MAX_DRIVERS][20];/* Driver-ID */ - struct task_struct *profd; /* For iprofd */ - isdn_modem_t mdm; /* tty-driver-data */ - isdn_net_dev *rx_netdev[ISDN_MAX_CHANNELS]; /* rx netdev-pointers */ - isdn_net_dev *st_netdev[ISDN_MAX_CHANNELS]; /* stat netdev-pointers */ - ulong ibytes[ISDN_MAX_CHANNELS]; /* Statistics incoming bytes */ - ulong obytes[ISDN_MAX_CHANNELS]; /* Statistics outgoing bytes */ - int v110emu[ISDN_MAX_CHANNELS]; /* V.110 emulator-mode 0=none */ - atomic_t v110use[ISDN_MAX_CHANNELS]; /* Usage-Semaphore for stream */ - isdn_v110_stream *v110[ISDN_MAX_CHANNELS]; /* V.110 private data */ - struct mutex mtx; /* serialize list access*/ - unsigned long global_features; -} isdn_dev; - -extern isdn_dev *dev; - - -#endif /* __ISDN_H__ */ diff --git a/include/linux/isdn_divertif.h b/include/linux/isdn_divertif.h deleted file mode 100644 index 19ab361f9f07..000000000000 --- a/include/linux/isdn_divertif.h +++ /dev/null @@ -1,35 +0,0 @@ -/* $Id: isdn_divertif.h,v 1.4.6.1 2001/09/23 22:25:05 kai Exp $ - * - * Header for the diversion supplementary interface for i4l. - * - * Author Werner Cornelius (werner@titro.de) - * Copyright by Werner Cornelius (werner@titro.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ -#ifndef _LINUX_ISDN_DIVERTIF_H -#define _LINUX_ISDN_DIVERTIF_H - -#include -#include -#include - -/***************************************************************/ -/* structure exchanging data between isdn hl and divert module */ -/***************************************************************/ -typedef struct - { ulong if_magic; /* magic info and version */ - int cmd; /* command */ - int (*stat_callback)(isdn_ctrl *); /* supplied by divert module when calling */ - int (*ll_cmd)(isdn_ctrl *); /* supplied by hl on return */ - char * (*drv_to_name)(int); /* map a driver id to name, supplied by hl */ - int (*name_to_drv)(char *); /* map a driver id to name, supplied by hl */ - } isdn_divert_if; - -/*********************/ -/* function register */ -/*********************/ -extern int DIVERT_REG_NAME(isdn_divert_if *); -#endif /* _LINUX_ISDN_DIVERTIF_H */ diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h deleted file mode 100644 index a0070c6dfaf8..000000000000 --- a/include/linux/isdn_ppp.h +++ /dev/null @@ -1,194 +0,0 @@ -/* Linux ISDN subsystem, sync PPP, interface to ipppd - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * Copyright 2000-2002 by Kai Germaschewski (kai@germaschewski.name) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ -#ifndef _LINUX_ISDN_PPP_H -#define _LINUX_ISDN_PPP_H - - - - -#ifdef CONFIG_IPPP_FILTER -#include -#endif -#include - -#define DECOMP_ERR_NOMEM (-10) - -#define MP_END_FRAG 0x40 -#define MP_BEGIN_FRAG 0x80 - -#define MP_MAX_QUEUE_LEN 16 - -/* - * We need a way for the decompressor to influence the generation of CCP - * Reset-Requests in a variety of ways. The decompressor is already returning - * a lot of information (generated skb length, error conditions) so we use - * another parameter. This parameter is a pointer to a structure which is - * to be marked valid by the decompressor and only in this case is ever used. - * Furthermore, the only case where this data is used is when the decom- - * pressor returns DECOMP_ERROR. - * - * We use this same struct for the reset entry of the compressor to commu- - * nicate to its caller how to deal with sending of a Reset Ack. In this - * case, expra is not used, but other options still apply (suppressing - * sending with rsend, appending arbitrary data, etc). - */ - -#define IPPP_RESET_MAXDATABYTES 32 - -struct isdn_ppp_resetparams { - unsigned char valid:1; /* rw Is this structure filled at all ? */ - unsigned char rsend:1; /* rw Should we send one at all ? */ - unsigned char idval:1; /* rw Is the id field valid ? */ - unsigned char dtval:1; /* rw Is the data field valid ? */ - unsigned char expra:1; /* rw Is an Ack expected for this Req ? */ - unsigned char id; /* wo Send CCP ResetReq with this id */ - unsigned short maxdlen; /* ro Max bytes to be stored in data field */ - unsigned short dlen; /* rw Bytes stored in data field */ - unsigned char *data; /* wo Data for ResetReq info field */ -}; - -/* - * this is an 'old friend' from ppp-comp.h under a new name - * check the original include for more information - */ -struct isdn_ppp_compressor { - struct isdn_ppp_compressor *next, *prev; - struct module *owner; - int num; /* CCP compression protocol number */ - - void *(*alloc) (struct isdn_ppp_comp_data *); - void (*free) (void *state); - int (*init) (void *state, struct isdn_ppp_comp_data *, - int unit,int debug); - - /* The reset entry needs to get more exact information about the - ResetReq or ResetAck it was called with. The parameters are - obvious. If reset is called without a Req or Ack frame which - could be handed into it, code MUST be set to 0. Using rsparm, - the reset entry can control if and how a ResetAck is returned. */ - - void (*reset) (void *state, unsigned char code, unsigned char id, - unsigned char *data, unsigned len, - struct isdn_ppp_resetparams *rsparm); - - int (*compress) (void *state, struct sk_buff *in, - struct sk_buff *skb_out, int proto); - - int (*decompress) (void *state,struct sk_buff *in, - struct sk_buff *skb_out, - struct isdn_ppp_resetparams *rsparm); - - void (*incomp) (void *state, struct sk_buff *in,int proto); - void (*stat) (void *state, struct compstat *stats); -}; - -extern int isdn_ppp_register_compressor(struct isdn_ppp_compressor *); -extern int isdn_ppp_unregister_compressor(struct isdn_ppp_compressor *); -extern int isdn_ppp_dial_slave(char *); -extern int isdn_ppp_hangup_slave(char *); - -typedef struct { - unsigned long seqerrs; - unsigned long frame_drops; - unsigned long overflows; - unsigned long max_queue_len; -} isdn_mppp_stats; - -typedef struct { - int mp_mrru; /* unused */ - struct sk_buff * frags; /* fragments sl list -- use skb->next */ - long frames; /* number of frames in the frame list */ - unsigned int seq; /* last processed packet seq #: any packets - * with smaller seq # will be dropped - * unconditionally */ - spinlock_t lock; - int ref_ct; - /* statistics */ - isdn_mppp_stats stats; -} ippp_bundle; - -#define NUM_RCV_BUFFS 64 - -struct ippp_buf_queue { - struct ippp_buf_queue *next; - struct ippp_buf_queue *last; - char *buf; /* NULL here indicates end of queue */ - int len; -}; - -/* The data structure for one CCP reset transaction */ -enum ippp_ccp_reset_states { - CCPResetIdle, - CCPResetSentReq, - CCPResetRcvdReq, - CCPResetSentAck, - CCPResetRcvdAck -}; - -struct ippp_ccp_reset_state { - enum ippp_ccp_reset_states state; /* State of this transaction */ - struct ippp_struct *is; /* Backlink to device stuff */ - unsigned char id; /* Backlink id index */ - unsigned char ta:1; /* The timer is active (flag) */ - unsigned char expra:1; /* We expect a ResetAck at all */ - int dlen; /* Databytes stored in data */ - struct timer_list timer; /* For timeouts/retries */ - /* This is a hack but seems sufficient for the moment. We do not want - to have this be yet another allocation for some bytes, it is more - memory management overhead than the whole mess is worth. */ - unsigned char data[IPPP_RESET_MAXDATABYTES]; -}; - -/* The data structure keeping track of the currently outstanding CCP Reset - transactions. */ -struct ippp_ccp_reset { - struct ippp_ccp_reset_state *rs[256]; /* One per possible id */ - unsigned char lastid; /* Last id allocated by the engine */ -}; - -struct ippp_struct { - struct ippp_struct *next_link; - int state; - spinlock_t buflock; - struct ippp_buf_queue rq[NUM_RCV_BUFFS]; /* packet queue for isdn_ppp_read() */ - struct ippp_buf_queue *first; /* pointer to (current) first packet */ - struct ippp_buf_queue *last; /* pointer to (current) last used packet in queue */ - wait_queue_head_t wq; - struct task_struct *tk; - unsigned int mpppcfg; - unsigned int pppcfg; - unsigned int mru; - unsigned int mpmru; - unsigned int mpmtu; - unsigned int maxcid; - struct isdn_net_local_s *lp; - int unit; - int minor; - unsigned int last_link_seqno; - long mp_seqno; -#ifdef CONFIG_ISDN_PPP_VJ - unsigned char *cbuf; - struct slcompress *slcomp; -#endif -#ifdef CONFIG_IPPP_FILTER - struct bpf_prog *pass_filter; /* filter for packets to pass */ - struct bpf_prog *active_filter; /* filter for pkts to reset idle */ -#endif - unsigned long debug; - struct isdn_ppp_compressor *compressor,*decompressor; - struct isdn_ppp_compressor *link_compressor,*link_decompressor; - void *decomp_stat,*comp_stat,*link_decomp_stat,*link_comp_stat; - struct ippp_ccp_reset *reset; /* Allocated on demand, may never be needed */ - unsigned long compflags; -}; - -#endif /* _LINUX_ISDN_PPP_H */ diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h deleted file mode 100644 index 8d80fdc68647..000000000000 --- a/include/linux/isdnif.h +++ /dev/null @@ -1,505 +0,0 @@ -/* $Id: isdnif.h,v 1.43.2.2 2004/01/12 23:08:35 keil Exp $ - * - * Linux ISDN subsystem - * Definition of the interface between the subsystem and its low-level drivers. - * - * Copyright 1994,95,96 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 Thinking Objects Software GmbH Wuerzburg - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ -#ifndef __ISDNIF_H__ -#define __ISDNIF_H__ - - -#include -#include - -/***************************************************************************/ -/* Extensions made by Werner Cornelius (werner@ikt.de) */ -/* */ -/* The proceed command holds a incoming call in a state to leave processes */ -/* enough time to check whether ist should be accepted. */ -/* The PROT_IO Command extends the interface to make protocol dependent */ -/* features available (call diversion, call waiting...). */ -/* */ -/* The PROT_IO Command is executed with the desired driver id and the arg */ -/* parameter coded as follows: */ -/* The lower 8 bits of arg contain the desired protocol from ISDN_PTYPE */ -/* definitions. The upper 24 bits represent the protocol specific cmd/stat.*/ -/* Any additional data is protocol and command specific. */ -/* This mechanism also applies to the statcallb callback STAT_PROT. */ -/* */ -/* This suggested extension permits an easy expansion of protocol specific */ -/* handling. Extensions may be added at any time without changing the HL */ -/* driver code and not getting conflicts without certifications. */ -/* The well known CAPI 2.0 interface handles such extensions in a similar */ -/* way. Perhaps a protocol specific module may be added and separately */ -/* loaded and linked to the basic isdn module for handling. */ -/***************************************************************************/ - -/*****************/ -/* DSS1 commands */ -/*****************/ -#define DSS1_CMD_INVOKE ((0x00 << 8) | ISDN_PTYPE_EURO) /* invoke a supplementary service */ -#define DSS1_CMD_INVOKE_ABORT ((0x01 << 8) | ISDN_PTYPE_EURO) /* abort a invoke cmd */ - -/*******************************/ -/* DSS1 Status callback values */ -/*******************************/ -#define DSS1_STAT_INVOKE_RES ((0x80 << 8) | ISDN_PTYPE_EURO) /* Result for invocation */ -#define DSS1_STAT_INVOKE_ERR ((0x81 << 8) | ISDN_PTYPE_EURO) /* Error Return for invocation */ -#define DSS1_STAT_INVOKE_BRD ((0x82 << 8) | ISDN_PTYPE_EURO) /* Deliver invoke broadcast info */ - - -/*********************************************************************/ -/* structures for DSS1 commands and callback */ -/* */ -/* An action is invoked by sending a DSS1_CMD_INVOKE. The ll_id, proc*/ -/* timeout, datalen and data fields must be set before calling. */ -/* */ -/* The return value is a positive hl_id value also delivered in the */ -/* hl_id field. A value of zero signals no more left hl_id capacitys.*/ -/* A negative return value signals errors in LL. So if the return */ -/* value is <= 0 no action in LL will be taken -> request ignored */ -/* */ -/* The timeout field must be filled with a positive value specifying */ -/* the amount of time the INVOKED process waits for a reaction from */ -/* the network. */ -/* If a response (either error or result) is received during this */ -/* intervall, a reporting callback is initiated and the process will */ -/* be deleted, the hl identifier will be freed. */ -/* If no response is received during the specified intervall, a error*/ -/* callback is initiated with timeout set to -1 and a datalen set */ -/* to 0. */ -/* If timeout is set to a value <= 0 during INVOCATION the process is*/ -/* immediately deleted after sending the data. No callback occurs ! */ -/* */ -/* A currently waiting process may be aborted with INVOKE_ABORT. No */ -/* callback will occur when a process has been aborted. */ -/* */ -/* Broadcast invoke frames from the network are reported via the */ -/* STAT_INVOKE_BRD callback. The ll_id is set to 0, the other fields */ -/* are supplied by the network and not by the HL. */ -/*********************************************************************/ - -/*****************/ -/* NI1 commands */ -/*****************/ -#define NI1_CMD_INVOKE ((0x00 << 8) | ISDN_PTYPE_NI1) /* invoke a supplementary service */ -#define NI1_CMD_INVOKE_ABORT ((0x01 << 8) | ISDN_PTYPE_NI1) /* abort a invoke cmd */ - -/*******************************/ -/* NI1 Status callback values */ -/*******************************/ -#define NI1_STAT_INVOKE_RES ((0x80 << 8) | ISDN_PTYPE_NI1) /* Result for invocation */ -#define NI1_STAT_INVOKE_ERR ((0x81 << 8) | ISDN_PTYPE_NI1) /* Error Return for invocation */ -#define NI1_STAT_INVOKE_BRD ((0x82 << 8) | ISDN_PTYPE_NI1) /* Deliver invoke broadcast info */ - -typedef struct - { ulong ll_id; /* ID supplied by LL when executing */ - /* a command and returned by HL for */ - /* INVOKE_RES and INVOKE_ERR */ - int hl_id; /* ID supplied by HL when called */ - /* for executing a cmd and delivered */ - /* for results and errors */ - /* must be supplied by LL when aborting*/ - int proc; /* invoke procedure used by CMD_INVOKE */ - /* returned by callback and broadcast */ - int timeout; /* timeout for INVOKE CMD in ms */ - /* -1 in stat callback when timed out */ - /* error value when error callback */ - int datalen; /* length of cmd or stat data */ - u_char *data;/* pointer to data delivered or send */ - } isdn_cmd_stat; - -/* - * Commands from linklevel to lowlevel - * - */ -#define ISDN_CMD_IOCTL 0 /* Perform ioctl */ -#define ISDN_CMD_DIAL 1 /* Dial out */ -#define ISDN_CMD_ACCEPTD 2 /* Accept an incoming call on D-Chan. */ -#define ISDN_CMD_ACCEPTB 3 /* Request B-Channel connect. */ -#define ISDN_CMD_HANGUP 4 /* Hangup */ -#define ISDN_CMD_CLREAZ 5 /* Clear EAZ(s) of channel */ -#define ISDN_CMD_SETEAZ 6 /* Set EAZ(s) of channel */ -#define ISDN_CMD_GETEAZ 7 /* Get EAZ(s) of channel */ -#define ISDN_CMD_SETSIL 8 /* Set Service-Indicator-List of channel */ -#define ISDN_CMD_GETSIL 9 /* Get Service-Indicator-List of channel */ -#define ISDN_CMD_SETL2 10 /* Set B-Chan. Layer2-Parameter */ -#define ISDN_CMD_GETL2 11 /* Get B-Chan. Layer2-Parameter */ -#define ISDN_CMD_SETL3 12 /* Set B-Chan. Layer3-Parameter */ -#define ISDN_CMD_GETL3 13 /* Get B-Chan. Layer3-Parameter */ -// #define ISDN_CMD_LOCK 14 /* Signal usage by upper levels */ -// #define ISDN_CMD_UNLOCK 15 /* Release usage-lock */ -#define ISDN_CMD_SUSPEND 16 /* Suspend connection */ -#define ISDN_CMD_RESUME 17 /* Resume connection */ -#define ISDN_CMD_PROCEED 18 /* Proceed with call establishment */ -#define ISDN_CMD_ALERT 19 /* Alert after Proceeding */ -#define ISDN_CMD_REDIR 20 /* Redir a incoming call */ -#define ISDN_CMD_PROT_IO 21 /* Protocol specific commands */ -#define CAPI_PUT_MESSAGE 22 /* CAPI message send down or up */ -#define ISDN_CMD_FAXCMD 23 /* FAX commands to HL-driver */ -#define ISDN_CMD_AUDIO 24 /* DSP, DTMF, ... settings */ - -/* - * Status-Values delivered from lowlevel to linklevel via - * statcallb(). - * - */ -#define ISDN_STAT_STAVAIL 256 /* Raw status-data available */ -#define ISDN_STAT_ICALL 257 /* Incoming call detected */ -#define ISDN_STAT_RUN 258 /* Signal protocol-code is running */ -#define ISDN_STAT_STOP 259 /* Signal halt of protocol-code */ -#define ISDN_STAT_DCONN 260 /* Signal D-Channel connect */ -#define ISDN_STAT_BCONN 261 /* Signal B-Channel connect */ -#define ISDN_STAT_DHUP 262 /* Signal D-Channel disconnect */ -#define ISDN_STAT_BHUP 263 /* Signal B-Channel disconnect */ -#define ISDN_STAT_CINF 264 /* Charge-Info */ -#define ISDN_STAT_LOAD 265 /* Signal new lowlevel-driver is loaded */ -#define ISDN_STAT_UNLOAD 266 /* Signal unload of lowlevel-driver */ -#define ISDN_STAT_BSENT 267 /* Signal packet sent */ -#define ISDN_STAT_NODCH 268 /* Signal no D-Channel */ -#define ISDN_STAT_ADDCH 269 /* Add more Channels */ -#define ISDN_STAT_CAUSE 270 /* Cause-Message */ -#define ISDN_STAT_ICALLW 271 /* Incoming call without B-chan waiting */ -#define ISDN_STAT_REDIR 272 /* Redir result */ -#define ISDN_STAT_PROT 273 /* protocol IO specific callback */ -#define ISDN_STAT_DISPLAY 274 /* deliver a received display message */ -#define ISDN_STAT_L1ERR 275 /* Signal Layer-1 Error */ -#define ISDN_STAT_FAXIND 276 /* FAX indications from HL-driver */ -#define ISDN_STAT_AUDIO 277 /* DTMF, DSP indications */ -#define ISDN_STAT_DISCH 278 /* Disable/Enable channel usage */ - -/* - * Audio commands - */ -#define ISDN_AUDIO_SETDD 0 /* Set DTMF detection */ -#define ISDN_AUDIO_DTMF 1 /* Rx/Tx DTMF */ - -/* - * Values for errcode field - */ -#define ISDN_STAT_L1ERR_SEND 1 -#define ISDN_STAT_L1ERR_RECV 2 - -/* - * Values for feature-field of interface-struct. - */ -/* Layer 2 */ -#define ISDN_FEATURE_L2_X75I (0x0001 << ISDN_PROTO_L2_X75I) -#define ISDN_FEATURE_L2_X75UI (0x0001 << ISDN_PROTO_L2_X75UI) -#define ISDN_FEATURE_L2_X75BUI (0x0001 << ISDN_PROTO_L2_X75BUI) -#define ISDN_FEATURE_L2_HDLC (0x0001 << ISDN_PROTO_L2_HDLC) -#define ISDN_FEATURE_L2_TRANS (0x0001 << ISDN_PROTO_L2_TRANS) -#define ISDN_FEATURE_L2_X25DTE (0x0001 << ISDN_PROTO_L2_X25DTE) -#define ISDN_FEATURE_L2_X25DCE (0x0001 << ISDN_PROTO_L2_X25DCE) -#define ISDN_FEATURE_L2_V11096 (0x0001 << ISDN_PROTO_L2_V11096) -#define ISDN_FEATURE_L2_V11019 (0x0001 << ISDN_PROTO_L2_V11019) -#define ISDN_FEATURE_L2_V11038 (0x0001 << ISDN_PROTO_L2_V11038) -#define ISDN_FEATURE_L2_MODEM (0x0001 << ISDN_PROTO_L2_MODEM) -#define ISDN_FEATURE_L2_FAX (0x0001 << ISDN_PROTO_L2_FAX) -#define ISDN_FEATURE_L2_HDLC_56K (0x0001 << ISDN_PROTO_L2_HDLC_56K) - -#define ISDN_FEATURE_L2_MASK (0x0FFFF) /* Max. 16 protocols */ -#define ISDN_FEATURE_L2_SHIFT (0) - -/* Layer 3 */ -#define ISDN_FEATURE_L3_TRANS (0x10000 << ISDN_PROTO_L3_TRANS) -#define ISDN_FEATURE_L3_TRANSDSP (0x10000 << ISDN_PROTO_L3_TRANSDSP) -#define ISDN_FEATURE_L3_FCLASS2 (0x10000 << ISDN_PROTO_L3_FCLASS2) -#define ISDN_FEATURE_L3_FCLASS1 (0x10000 << ISDN_PROTO_L3_FCLASS1) - -#define ISDN_FEATURE_L3_MASK (0x0FF0000) /* Max. 8 Protocols */ -#define ISDN_FEATURE_L3_SHIFT (16) - -/* Signaling */ -#define ISDN_FEATURE_P_UNKNOWN (0x1000000 << ISDN_PTYPE_UNKNOWN) -#define ISDN_FEATURE_P_1TR6 (0x1000000 << ISDN_PTYPE_1TR6) -#define ISDN_FEATURE_P_EURO (0x1000000 << ISDN_PTYPE_EURO) -#define ISDN_FEATURE_P_NI1 (0x1000000 << ISDN_PTYPE_NI1) - -#define ISDN_FEATURE_P_MASK (0x0FF000000) /* Max. 8 Protocols */ -#define ISDN_FEATURE_P_SHIFT (24) - -typedef struct setup_parm { - unsigned char phone[32]; /* Remote Phone-Number */ - unsigned char eazmsn[32]; /* Local EAZ or MSN */ - unsigned char si1; /* Service Indicator 1 */ - unsigned char si2; /* Service Indicator 2 */ - unsigned char plan; /* Numbering plan */ - unsigned char screen; /* Screening info */ -} setup_parm; - - -#ifdef CONFIG_ISDN_TTY_FAX -/* T.30 Fax G3 */ - -#define FAXIDLEN 21 - -typedef struct T30_s { - /* session parameters */ - __u8 resolution; - __u8 rate; - __u8 width; - __u8 length; - __u8 compression; - __u8 ecm; - __u8 binary; - __u8 scantime; - __u8 id[FAXIDLEN]; - /* additional parameters */ - __u8 phase; - __u8 direction; - __u8 code; - __u8 badlin; - __u8 badmul; - __u8 bor; - __u8 fet; - __u8 pollid[FAXIDLEN]; - __u8 cq; - __u8 cr; - __u8 ctcrty; - __u8 minsp; - __u8 phcto; - __u8 rel; - __u8 nbc; - /* remote station parameters */ - __u8 r_resolution; - __u8 r_rate; - __u8 r_width; - __u8 r_length; - __u8 r_compression; - __u8 r_ecm; - __u8 r_binary; - __u8 r_scantime; - __u8 r_id[FAXIDLEN]; - __u8 r_code; -} __packed T30_s; - -#define ISDN_TTY_FAX_CONN_IN 0 -#define ISDN_TTY_FAX_CONN_OUT 1 - -#define ISDN_TTY_FAX_FCON 0 -#define ISDN_TTY_FAX_DIS 1 -#define ISDN_TTY_FAX_FTT 2 -#define ISDN_TTY_FAX_MCF 3 -#define ISDN_TTY_FAX_DCS 4 -#define ISDN_TTY_FAX_TRAIN_OK 5 -#define ISDN_TTY_FAX_EOP 6 -#define ISDN_TTY_FAX_EOM 7 -#define ISDN_TTY_FAX_MPS 8 -#define ISDN_TTY_FAX_DTC 9 -#define ISDN_TTY_FAX_RID 10 -#define ISDN_TTY_FAX_HNG 11 -#define ISDN_TTY_FAX_DT 12 -#define ISDN_TTY_FAX_FCON_I 13 -#define ISDN_TTY_FAX_DR 14 -#define ISDN_TTY_FAX_ET 15 -#define ISDN_TTY_FAX_CFR 16 -#define ISDN_TTY_FAX_PTS 17 -#define ISDN_TTY_FAX_SENT 18 - -#define ISDN_FAX_PHASE_IDLE 0 -#define ISDN_FAX_PHASE_A 1 -#define ISDN_FAX_PHASE_B 2 -#define ISDN_FAX_PHASE_C 3 -#define ISDN_FAX_PHASE_D 4 -#define ISDN_FAX_PHASE_E 5 - -#endif /* TTY_FAX */ - -#define ISDN_FAX_CLASS1_FAE 0 -#define ISDN_FAX_CLASS1_FTS 1 -#define ISDN_FAX_CLASS1_FRS 2 -#define ISDN_FAX_CLASS1_FTM 3 -#define ISDN_FAX_CLASS1_FRM 4 -#define ISDN_FAX_CLASS1_FTH 5 -#define ISDN_FAX_CLASS1_FRH 6 -#define ISDN_FAX_CLASS1_CTRL 7 - -#define ISDN_FAX_CLASS1_OK 0 -#define ISDN_FAX_CLASS1_CONNECT 1 -#define ISDN_FAX_CLASS1_NOCARR 2 -#define ISDN_FAX_CLASS1_ERROR 3 -#define ISDN_FAX_CLASS1_FCERROR 4 -#define ISDN_FAX_CLASS1_QUERY 5 - -typedef struct { - __u8 cmd; - __u8 subcmd; - __u8 para[50]; -} aux_s; - -#define AT_COMMAND 0 -#define AT_EQ_VALUE 1 -#define AT_QUERY 2 -#define AT_EQ_QUERY 3 - -/* CAPI structs */ - -/* this is compatible to the old union size */ -#define MAX_CAPI_PARA_LEN 50 - -typedef struct { - /* Header */ - __u16 Length; - __u16 ApplId; - __u8 Command; - __u8 Subcommand; - __u16 Messagenumber; - - /* Parameter */ - union { - __u32 Controller; - __u32 PLCI; - __u32 NCCI; - } adr; - __u8 para[MAX_CAPI_PARA_LEN]; -} capi_msg; - -/* - * Structure for exchanging above infos - * - */ -typedef struct { - int driver; /* Lowlevel-Driver-ID */ - int command; /* Command or Status (see above) */ - ulong arg; /* Additional Data */ - union { - ulong errcode; /* Type of error with STAT_L1ERR */ - int length; /* Amount of bytes sent with STAT_BSENT */ - u_char num[50]; /* Additional Data */ - setup_parm setup;/* For SETUP msg */ - capi_msg cmsg; /* For CAPI like messages */ - char display[85];/* display message data */ - isdn_cmd_stat isdn_io; /* ISDN IO-parameter/result */ - aux_s aux; /* for modem commands/indications */ -#ifdef CONFIG_ISDN_TTY_FAX - T30_s *fax; /* Pointer to ttys fax struct */ -#endif - ulong userdata; /* User Data */ - } parm; -} isdn_ctrl; - -#define dss1_io isdn_io -#define ni1_io isdn_io - -/* - * The interface-struct itself (initialized at load-time of lowlevel-driver) - * - * See Documentation/isdn/INTERFACE for a description, how the communication - * between the ISDN subsystem and its drivers is done. - * - */ -typedef struct { - struct module *owner; - - /* Number of channels supported by this driver - */ - int channels; - - /* - * Maximum Size of transmit/receive-buffer this driver supports. - */ - int maxbufsize; - - /* Feature-Flags for this driver. - * See defines ISDN_FEATURE_... for Values - */ - unsigned long features; - - /* - * Needed for calculating - * dev->hard_header_len = linklayer header + hl_hdrlen; - * Drivers, not supporting sk_buff's should set this to 0. - */ - unsigned short hl_hdrlen; - - /* - * Receive-Callback using sk_buff's - * Parameters: - * int Driver-ID - * int local channel-number (0 ...) - * struct sk_buff *skb received Data - */ - void (*rcvcallb_skb)(int, int, struct sk_buff *); - - /* Status-Callback - * Parameters: - * isdn_ctrl* - * driver = Driver ID. - * command = One of above ISDN_STAT_... constants. - * arg = depending on status-type. - * num = depending on status-type. - */ - int (*statcallb)(isdn_ctrl*); - - /* Send command - * Parameters: - * isdn_ctrl* - * driver = Driver ID. - * command = One of above ISDN_CMD_... constants. - * arg = depending on command. - * num = depending on command. - */ - int (*command)(isdn_ctrl*); - - /* - * Send data using sk_buff's - * Parameters: - * int driverId - * int local channel-number (0...) - * int Flag: Need ACK for this packet. - * struct sk_buff *skb Data to send - */ - int (*writebuf_skb) (int, int, int, struct sk_buff *); - - /* Send raw D-Channel-Commands - * Parameters: - * u_char pointer data - * int length of data - * int driverId - * int local channel-number (0 ...) - */ - int (*writecmd)(const u_char __user *, int, int, int); - - /* Read raw Status replies - * u_char pointer data (volatile) - * int length of buffer - * int driverId - * int local channel-number (0 ...) - */ - int (*readstat)(u_char __user *, int, int, int); - - char id[20]; -} isdn_if; - -/* - * Function which must be called by lowlevel-driver at loadtime with - * the following fields of above struct set: - * - * channels Number of channels that will be supported. - * hl_hdrlen Space to preserve in sk_buff's when sending. Drivers, not - * supporting sk_buff's should set this to 0. - * command Address of Command-Handler. - * features Bitwise coded Features of this driver. (use ISDN_FEATURE_...) - * writebuf_skb Address of Skbuff-Send-Handler. - * writecmd " " D-Channel " which accepts raw D-Ch-Commands. - * readstat " " D-Channel " which delivers raw Status-Data. - * - * The linklevel-driver fills the following fields: - * - * channels Driver-ID assigned to this driver. (Must be used on all - * subsequent callbacks. - * rcvcallb_skb Address of handler for received Skbuff's. - * statcallb " " " for status-changes. - * - */ -extern int register_isdn(isdn_if*); -#include - -#endif /* __ISDNIF_H__ */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h deleted file mode 100644 index f6358558f9f5..000000000000 --- a/include/linux/wanrouter.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * wanrouter.h Legacy declarations kept around until X25 is removed - */ - -#ifndef _ROUTER_H -#define _ROUTER_H - -#include - -#endif /* _ROUTER_H */ diff --git a/include/uapi/linux/isdn.h b/include/uapi/linux/isdn.h deleted file mode 100644 index f371fd52ed75..000000000000 --- a/include/uapi/linux/isdn.h +++ /dev/null @@ -1,144 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* $Id: isdn.h,v 1.125.2.3 2004/02/10 01:07:14 keil Exp $ - * - * Main header for the Linux ISDN subsystem (linklevel). - * - * Copyright 1994,95,96 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 by Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _UAPI__ISDN_H__ -#define _UAPI__ISDN_H__ - -#include -#include - -#define ISDN_MAX_DRIVERS 32 -#define ISDN_MAX_CHANNELS 64 - -/* New ioctl-codes */ -#define IIOCNETAIF _IO('I',1) -#define IIOCNETDIF _IO('I',2) -#define IIOCNETSCF _IO('I',3) -#define IIOCNETGCF _IO('I',4) -#define IIOCNETANM _IO('I',5) -#define IIOCNETDNM _IO('I',6) -#define IIOCNETGNM _IO('I',7) -#define IIOCGETSET _IO('I',8) /* no longer supported */ -#define IIOCSETSET _IO('I',9) /* no longer supported */ -#define IIOCSETVER _IO('I',10) -#define IIOCNETHUP _IO('I',11) -#define IIOCSETGST _IO('I',12) -#define IIOCSETBRJ _IO('I',13) -#define IIOCSIGPRF _IO('I',14) -#define IIOCGETPRF _IO('I',15) -#define IIOCSETPRF _IO('I',16) -#define IIOCGETMAP _IO('I',17) -#define IIOCSETMAP _IO('I',18) -#define IIOCNETASL _IO('I',19) -#define IIOCNETDIL _IO('I',20) -#define IIOCGETCPS _IO('I',21) -#define IIOCGETDVR _IO('I',22) -#define IIOCNETLCR _IO('I',23) /* dwabc ioctl for LCR from isdnlog */ -#define IIOCNETDWRSET _IO('I',24) /* dwabc ioctl to reset abc-values to default on a net-interface */ - -#define IIOCNETALN _IO('I',32) -#define IIOCNETDLN _IO('I',33) - -#define IIOCNETGPN _IO('I',34) - -#define IIOCDBGVAR _IO('I',127) - -#define IIOCDRVCTL _IO('I',128) - -/* cisco hdlck device private ioctls */ -#define SIOCGKEEPPERIOD (SIOCDEVPRIVATE + 0) -#define SIOCSKEEPPERIOD (SIOCDEVPRIVATE + 1) -#define SIOCGDEBSERINT (SIOCDEVPRIVATE + 2) -#define SIOCSDEBSERINT (SIOCDEVPRIVATE + 3) - -/* Packet encapsulations for net-interfaces */ -#define ISDN_NET_ENCAP_ETHER 0 -#define ISDN_NET_ENCAP_RAWIP 1 -#define ISDN_NET_ENCAP_IPTYP 2 -#define ISDN_NET_ENCAP_CISCOHDLC 3 /* Without SLARP and keepalive */ -#define ISDN_NET_ENCAP_SYNCPPP 4 -#define ISDN_NET_ENCAP_UIHDLC 5 -#define ISDN_NET_ENCAP_CISCOHDLCK 6 /* With SLARP and keepalive */ -#define ISDN_NET_ENCAP_X25IFACE 7 /* Documentation/networking/x25-iface.txt */ -#define ISDN_NET_ENCAP_MAX_ENCAP ISDN_NET_ENCAP_X25IFACE - -/* Facility which currently uses an ISDN-channel */ -#define ISDN_USAGE_NONE 0 -#define ISDN_USAGE_RAW 1 -#define ISDN_USAGE_MODEM 2 -#define ISDN_USAGE_NET 3 -#define ISDN_USAGE_VOICE 4 -#define ISDN_USAGE_FAX 5 -#define ISDN_USAGE_MASK 7 /* Mask to get plain usage */ -#define ISDN_USAGE_DISABLED 32 /* This bit is set, if channel is disabled */ -#define ISDN_USAGE_EXCLUSIVE 64 /* This bit is set, if channel is exclusive */ -#define ISDN_USAGE_OUTGOING 128 /* This bit is set, if channel is outgoing */ - -#define ISDN_MODEM_NUMREG 24 /* Number of Modem-Registers */ -#define ISDN_LMSNLEN 255 /* Length of tty's Listen-MSN string */ -#define ISDN_CMSGLEN 50 /* Length of CONNECT-Message to add for Modem */ - -#define ISDN_MSNLEN 32 -#define NET_DV 0x06 /* Data version for isdn_net_ioctl_cfg */ -#define TTY_DV 0x06 /* Data version for iprofd etc. */ - -#define INF_DV 0x01 /* Data version for /dev/isdninfo */ - -typedef struct { - char drvid[25]; - unsigned long arg; -} isdn_ioctl_struct; - -typedef struct { - char name[10]; - char phone[ISDN_MSNLEN]; - int outgoing; -} isdn_net_ioctl_phone; - -typedef struct { - char name[10]; /* Name of interface */ - char master[10]; /* Name of Master for Bundling */ - char slave[10]; /* Name of Slave for Bundling */ - char eaz[256]; /* EAZ/MSN */ - char drvid[25]; /* DriverId for Bindings */ - int onhtime; /* Hangup-Timeout */ - int charge; /* Charge-Units */ - int l2_proto; /* Layer-2 protocol */ - int l3_proto; /* Layer-3 protocol */ - int p_encap; /* Encapsulation */ - int exclusive; /* Channel, if bound exclusive */ - int dialmax; /* Dial Retry-Counter */ - int slavedelay; /* Delay until slave starts up */ - int cbdelay; /* Delay before Callback */ - int chargehup; /* Flag: Charge-Hangup */ - int ihup; /* Flag: Hangup-Timeout on incoming line */ - int secure; /* Flag: Secure */ - int callback; /* Flag: Callback */ - int cbhup; /* Flag: Reject Call before Callback */ - int pppbind; /* ippp device for bindings */ - int chargeint; /* Use fixed charge interval length */ - int triggercps; /* BogoCPS needed for triggering slave */ - int dialtimeout; /* Dial-Timeout */ - int dialwait; /* Time to wait after failed dial */ - int dialmode; /* Flag: off / on / auto */ -} isdn_net_ioctl_cfg; - -#define ISDN_NET_DIALMODE_MASK 0xC0 /* bits for status */ -#define ISDN_NET_DM_OFF 0x00 /* this interface is stopped */ -#define ISDN_NET_DM_MANUAL 0x40 /* this interface is on (manual) */ -#define ISDN_NET_DM_AUTO 0x80 /* this interface is autodial */ -#define ISDN_NET_DIALMODE(x) ((&(x))->flags & ISDN_NET_DIALMODE_MASK) - - -#endif /* _UAPI__ISDN_H__ */ diff --git a/include/uapi/linux/isdn_divertif.h b/include/uapi/linux/isdn_divertif.h deleted file mode 100644 index 0a17bb1bcb1b..000000000000 --- a/include/uapi/linux/isdn_divertif.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* $Id: isdn_divertif.h,v 1.4.6.1 2001/09/23 22:25:05 kai Exp $ - * - * Header for the diversion supplementary interface for i4l. - * - * Author Werner Cornelius (werner@titro.de) - * Copyright by Werner Cornelius (werner@titro.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _UAPI_LINUX_ISDN_DIVERTIF_H -#define _UAPI_LINUX_ISDN_DIVERTIF_H - -/***********************************************************/ -/* magic value is also used to control version information */ -/***********************************************************/ -#define DIVERT_IF_MAGIC 0x25873401 -#define DIVERT_CMD_REG 0x00 /* register command */ -#define DIVERT_CMD_REL 0x01 /* release command */ -#define DIVERT_NO_ERR 0x00 /* return value no error */ -#define DIVERT_CMD_ERR 0x01 /* invalid cmd */ -#define DIVERT_VER_ERR 0x02 /* magic/version invalid */ -#define DIVERT_REG_ERR 0x03 /* module already registered */ -#define DIVERT_REL_ERR 0x04 /* module not registered */ -#define DIVERT_REG_NAME isdn_register_divert - - -#endif /* _UAPI_LINUX_ISDN_DIVERTIF_H */ diff --git a/include/uapi/linux/isdn_ppp.h b/include/uapi/linux/isdn_ppp.h deleted file mode 100644 index 0bdc4efaacb2..000000000000 --- a/include/uapi/linux/isdn_ppp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ -/* Linux ISDN subsystem, sync PPP, interface to ipppd - * - * Copyright 1994-1999 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 Thinking Objects Software GmbH Wuerzburg - * Copyright 1995,96 by Michael Hipp (Michael.Hipp@student.uni-tuebingen.de) - * Copyright 2000-2002 by Kai Germaschewski (kai@germaschewski.name) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _UAPI_LINUX_ISDN_PPP_H -#define _UAPI_LINUX_ISDN_PPP_H - -#define CALLTYPE_INCOMING 0x1 -#define CALLTYPE_OUTGOING 0x2 -#define CALLTYPE_CALLBACK 0x4 - -#define IPPP_VERSION "2.2.0" - -struct pppcallinfo -{ - int calltype; - unsigned char local_num[64]; - unsigned char remote_num[64]; - int charge_units; -}; - -#define PPPIOCGCALLINFO _IOWR('t',128,struct pppcallinfo) -#define PPPIOCBUNDLE _IOW('t',129,int) -#define PPPIOCGMPFLAGS _IOR('t',130,int) -#define PPPIOCSMPFLAGS _IOW('t',131,int) -#define PPPIOCSMPMTU _IOW('t',132,int) -#define PPPIOCSMPMRU _IOW('t',133,int) -#define PPPIOCGCOMPRESSORS _IOR('t',134,unsigned long [8]) -#define PPPIOCSCOMPRESSOR _IOW('t',135,int) -#define PPPIOCGIFNAME _IOR('t',136, char [IFNAMSIZ] ) - - -#define SC_MP_PROT 0x00000200 -#define SC_REJ_MP_PROT 0x00000400 -#define SC_OUT_SHORT_SEQ 0x00000800 -#define SC_IN_SHORT_SEQ 0x00004000 - -#define SC_DECOMP_ON 0x01 -#define SC_COMP_ON 0x02 -#define SC_DECOMP_DISCARD 0x04 -#define SC_COMP_DISCARD 0x08 -#define SC_LINK_DECOMP_ON 0x10 -#define SC_LINK_COMP_ON 0x20 -#define SC_LINK_DECOMP_DISCARD 0x40 -#define SC_LINK_COMP_DISCARD 0x80 - -#define ISDN_PPP_COMP_MAX_OPTIONS 16 - -#define IPPP_COMP_FLAG_XMIT 0x1 -#define IPPP_COMP_FLAG_LINK 0x2 - -struct isdn_ppp_comp_data { - int num; - unsigned char options[ISDN_PPP_COMP_MAX_OPTIONS]; - int optlen; - int flags; -}; - -#endif /* _UAPI_LINUX_ISDN_PPP_H */ diff --git a/include/uapi/linux/isdnif.h b/include/uapi/linux/isdnif.h deleted file mode 100644 index 611a69196738..000000000000 --- a/include/uapi/linux/isdnif.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ -/* $Id: isdnif.h,v 1.43.2.2 2004/01/12 23:08:35 keil Exp $ - * - * Linux ISDN subsystem - * Definition of the interface between the subsystem and its low-level drivers. - * - * Copyright 1994,95,96 by Fritz Elfert (fritz@isdn4linux.de) - * Copyright 1995,96 Thinking Objects Software GmbH Wuerzburg - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef _UAPI__ISDNIF_H__ -#define _UAPI__ISDNIF_H__ - - -/* - * Values for general protocol-selection - */ -#define ISDN_PTYPE_UNKNOWN 0 /* Protocol undefined */ -#define ISDN_PTYPE_1TR6 1 /* german 1TR6-protocol */ -#define ISDN_PTYPE_EURO 2 /* EDSS1-protocol */ -#define ISDN_PTYPE_LEASED 3 /* for leased lines */ -#define ISDN_PTYPE_NI1 4 /* US NI-1 protocol */ -#define ISDN_PTYPE_MAX 7 /* Max. 8 Protocols */ - -/* - * Values for Layer-2-protocol-selection - */ -#define ISDN_PROTO_L2_X75I 0 /* X75/LAPB with I-Frames */ -#define ISDN_PROTO_L2_X75UI 1 /* X75/LAPB with UI-Frames */ -#define ISDN_PROTO_L2_X75BUI 2 /* X75/LAPB with UI-Frames */ -#define ISDN_PROTO_L2_HDLC 3 /* HDLC */ -#define ISDN_PROTO_L2_TRANS 4 /* Transparent (Voice) */ -#define ISDN_PROTO_L2_X25DTE 5 /* X25/LAPB DTE mode */ -#define ISDN_PROTO_L2_X25DCE 6 /* X25/LAPB DCE mode */ -#define ISDN_PROTO_L2_V11096 7 /* V.110 bitrate adaption 9600 Baud */ -#define ISDN_PROTO_L2_V11019 8 /* V.110 bitrate adaption 19200 Baud */ -#define ISDN_PROTO_L2_V11038 9 /* V.110 bitrate adaption 38400 Baud */ -#define ISDN_PROTO_L2_MODEM 10 /* Analog Modem on Board */ -#define ISDN_PROTO_L2_FAX 11 /* Fax Group 2/3 */ -#define ISDN_PROTO_L2_HDLC_56K 12 /* HDLC 56k */ -#define ISDN_PROTO_L2_MAX 15 /* Max. 16 Protocols */ - -/* - * Values for Layer-3-protocol-selection - */ -#define ISDN_PROTO_L3_TRANS 0 /* Transparent */ -#define ISDN_PROTO_L3_TRANSDSP 1 /* Transparent with DSP */ -#define ISDN_PROTO_L3_FCLASS2 2 /* Fax Group 2/3 CLASS 2 */ -#define ISDN_PROTO_L3_FCLASS1 3 /* Fax Group 2/3 CLASS 1 */ -#define ISDN_PROTO_L3_MAX 7 /* Max. 8 Protocols */ - - -#endif /* _UAPI__ISDNIF_H__ */ diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h deleted file mode 100644 index 2f1216d00caa..000000000000 --- a/include/uapi/linux/wanrouter.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * wanrouter.h Legacy declarations kept around until X25 is removed - */ - -#ifndef _UAPI_ROUTER_H -#define _UAPI_ROUTER_H - -/* 'state' defines */ -enum wan_states -{ - WAN_UNCONFIGURED, /* link/channel is not configured */ - WAN_DISCONNECTED, /* link/channel is disconnected */ - WAN_CONNECTING, /* connection is in progress */ - WAN_CONNECTED /* link/channel is operational */ -}; - -#endif /* _UAPI_ROUTER_H */ -- cgit v1.2.3 From 99c2aa151a7182c58f9477a376304c538d9cc5ab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Apr 2019 22:57:08 +0200 Subject: isdn: hdlc: move into mISDN The last remnant of the isdn4linux interface is now the isdnhdlc support, used by the netjet driver. Move it next to that driver. Signed-off-by: Arnd Bergmann --- drivers/isdn/Makefile | 1 - drivers/isdn/hardware/mISDN/Kconfig | 7 +- drivers/isdn/hardware/mISDN/Makefile | 2 + drivers/isdn/hardware/mISDN/isdnhdlc.c | 630 +++++++++++++++++++++++++++++++++ drivers/isdn/hardware/mISDN/isdnhdlc.h | 82 +++++ drivers/isdn/hardware/mISDN/netjet.c | 2 +- drivers/isdn/i4l/Makefile | 6 - drivers/isdn/i4l/isdnhdlc.c | 630 --------------------------------- include/linux/isdn/hdlc.h | 82 ----- 9 files changed, 720 insertions(+), 722 deletions(-) create mode 100644 drivers/isdn/hardware/mISDN/isdnhdlc.c create mode 100644 drivers/isdn/hardware/mISDN/isdnhdlc.h delete mode 100644 drivers/isdn/i4l/Makefile delete mode 100644 drivers/isdn/i4l/isdnhdlc.c delete mode 100644 include/linux/isdn/hdlc.h (limited to 'include/linux') diff --git a/drivers/isdn/Makefile b/drivers/isdn/Makefile index 379b4a03c321..f2a529c5a511 100644 --- a/drivers/isdn/Makefile +++ b/drivers/isdn/Makefile @@ -3,7 +3,6 @@ # Object files in subdirectories -obj-$(CONFIG_ISDN_I4L) += i4l/ obj-$(CONFIG_ISDN_CAPI) += capi/ obj-$(CONFIG_MISDN) += mISDN/ obj-$(CONFIG_ISDN) += hardware/ diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig index a7a34a85b970..304f50c08da2 100644 --- a/drivers/isdn/hardware/mISDN/Kconfig +++ b/drivers/isdn/hardware/mISDN/Kconfig @@ -79,11 +79,14 @@ config MISDN_NETJET depends on PCI depends on TTY select MISDN_IPAC - select ISDN_HDLC - select ISDN_I4L + select MISDN_HDLC help Enable support for Traverse Technologies NETJet PCI cards. +config MISDN_HDLC + tristate + select CRC_CCITT + select BITREVERSE config MISDN_IPAC tristate diff --git a/drivers/isdn/hardware/mISDN/Makefile b/drivers/isdn/hardware/mISDN/Makefile index 422f9fd8ab9a..3f50f8c4753f 100644 --- a/drivers/isdn/hardware/mISDN/Makefile +++ b/drivers/isdn/hardware/mISDN/Makefile @@ -15,3 +15,5 @@ obj-$(CONFIG_MISDN_NETJET) += netjet.o # chip modules obj-$(CONFIG_MISDN_IPAC) += mISDNipac.o obj-$(CONFIG_MISDN_ISAR) += mISDNisar.o + +obj-$(CONFIG_MISDN_HDLC) += isdnhdlc.o diff --git a/drivers/isdn/hardware/mISDN/isdnhdlc.c b/drivers/isdn/hardware/mISDN/isdnhdlc.c new file mode 100644 index 000000000000..3a8b562e63b1 --- /dev/null +++ b/drivers/isdn/hardware/mISDN/isdnhdlc.c @@ -0,0 +1,630 @@ +/* + * isdnhdlc.c -- General purpose ISDN HDLC decoder. + * + * Copyright (C) + * 2009 Karsten Keil + * 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include "isdnhdlc.h" + +/*-------------------------------------------------------------------*/ + +MODULE_AUTHOR("Wolfgang Mües , " + "Frode Isaksen , " + "Kai Germaschewski "); +MODULE_DESCRIPTION("General purpose ISDN HDLC decoder"); +MODULE_LICENSE("GPL"); + +/*-------------------------------------------------------------------*/ + +enum { + HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7, + HDLC_GET_DATA, HDLC_FAST_FLAG +}; + +enum { + HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG, + HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG, + HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0, + HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED, HDLC_SENDFLAG_ONE +}; + +void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features) +{ + memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); + hdlc->state = HDLC_GET_DATA; + if (features & HDLC_56KBIT) + hdlc->do_adapt56 = 1; + if (features & HDLC_BITREVERSE) + hdlc->do_bitreverse = 1; +} +EXPORT_SYMBOL(isdnhdlc_out_init); + +void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features) +{ + memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); + if (features & HDLC_DCHANNEL) { + hdlc->dchannel = 1; + hdlc->state = HDLC_SEND_FIRST_FLAG; + } else { + hdlc->dchannel = 0; + hdlc->state = HDLC_SEND_FAST_FLAG; + hdlc->ffvalue = 0x7e; + } + hdlc->cbin = 0x7e; + if (features & HDLC_56KBIT) { + hdlc->do_adapt56 = 1; + hdlc->state = HDLC_SENDFLAG_B0; + } else + hdlc->data_bits = 8; + if (features & HDLC_BITREVERSE) + hdlc->do_bitreverse = 1; +} +EXPORT_SYMBOL(isdnhdlc_rcv_init); + +static int +check_frame(struct isdnhdlc_vars *hdlc) +{ + int status; + + if (hdlc->dstpos < 2) /* too small - framing error */ + status = -HDLC_FRAMING_ERROR; + else if (hdlc->crc != 0xf0b8) /* crc error */ + status = -HDLC_CRC_ERROR; + else { + /* remove CRC */ + hdlc->dstpos -= 2; + /* good frame */ + status = hdlc->dstpos; + } + return status; +} + +/* + isdnhdlc_decode - decodes HDLC frames from a transparent bit stream. + + The source buffer is scanned for valid HDLC frames looking for + flags (01111110) to indicate the start of a frame. If the start of + the frame is found, the bit stuffing is removed (0 after 5 1's). + When a new flag is found, the complete frame has been received + and the CRC is checked. + If a valid frame is found, the function returns the frame length + excluding the CRC with the bit HDLC_END_OF_FRAME set. + If the beginning of a valid frame is found, the function returns + the length. + If a framing error is found (too many 1s and not a flag) the function + returns the length with the bit HDLC_FRAMING_ERROR set. + If a CRC error is found the function returns the length with the + bit HDLC_CRC_ERROR set. + If the frame length exceeds the destination buffer size, the function + returns the length with the bit HDLC_LENGTH_ERROR set. + + src - source buffer + slen - source buffer length + count - number of bytes removed (decoded) from the source buffer + dst _ destination buffer + dsize - destination buffer size + returns - number of decoded bytes in the destination buffer and status + flag. +*/ +int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen, + int *count, u8 *dst, int dsize) +{ + int status = 0; + + static const unsigned char fast_flag[] = { + 0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f + }; + + static const unsigned char fast_flag_value[] = { + 0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f + }; + + static const unsigned char fast_abort[] = { + 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff + }; + +#define handle_fast_flag(h) \ + do { \ + if (h->cbin == fast_flag[h->bit_shift]) { \ + h->ffvalue = fast_flag_value[h->bit_shift]; \ + h->state = HDLC_FAST_FLAG; \ + h->ffbit_shift = h->bit_shift; \ + h->bit_shift = 1; \ + } else { \ + h->state = HDLC_GET_DATA; \ + h->data_received = 0; \ + } \ + } while (0) + +#define handle_abort(h) \ + do { \ + h->shift_reg = fast_abort[h->ffbit_shift - 1]; \ + h->hdlc_bits1 = h->ffbit_shift - 2; \ + if (h->hdlc_bits1 < 0) \ + h->hdlc_bits1 = 0; \ + h->data_bits = h->ffbit_shift - 1; \ + h->state = HDLC_GET_DATA; \ + h->data_received = 0; \ + } while (0) + + *count = slen; + + while (slen > 0) { + if (hdlc->bit_shift == 0) { + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + hdlc->cbin = bitrev8(*src++); + else + hdlc->cbin = *src++; + slen--; + hdlc->bit_shift = 8; + if (hdlc->do_adapt56) + hdlc->bit_shift--; + } + + switch (hdlc->state) { + case STOPPED: + return 0; + case HDLC_FAST_IDLE: + if (hdlc->cbin == 0xff) { + hdlc->bit_shift = 0; + break; + } + hdlc->state = HDLC_GET_FLAG_B0; + hdlc->hdlc_bits1 = 0; + hdlc->bit_shift = 8; + break; + case HDLC_GET_FLAG_B0: + if (!(hdlc->cbin & 0x80)) { + hdlc->state = HDLC_GETFLAG_B1A6; + hdlc->hdlc_bits1 = 0; + } else { + if ((!hdlc->do_adapt56) && + (++hdlc->hdlc_bits1 >= 8) && + (hdlc->bit_shift == 1)) + hdlc->state = HDLC_FAST_IDLE; + } + hdlc->cbin <<= 1; + hdlc->bit_shift--; + break; + case HDLC_GETFLAG_B1A6: + if (hdlc->cbin & 0x80) { + hdlc->hdlc_bits1++; + if (hdlc->hdlc_bits1 == 6) + hdlc->state = HDLC_GETFLAG_B7; + } else + hdlc->hdlc_bits1 = 0; + hdlc->cbin <<= 1; + hdlc->bit_shift--; + break; + case HDLC_GETFLAG_B7: + if (hdlc->cbin & 0x80) { + hdlc->state = HDLC_GET_FLAG_B0; + } else { + hdlc->state = HDLC_GET_DATA; + hdlc->crc = 0xffff; + hdlc->shift_reg = 0; + hdlc->hdlc_bits1 = 0; + hdlc->data_bits = 0; + hdlc->data_received = 0; + } + hdlc->cbin <<= 1; + hdlc->bit_shift--; + break; + case HDLC_GET_DATA: + if (hdlc->cbin & 0x80) { + hdlc->hdlc_bits1++; + switch (hdlc->hdlc_bits1) { + case 6: + break; + case 7: + if (hdlc->data_received) + /* bad frame */ + status = -HDLC_FRAMING_ERROR; + if (!hdlc->do_adapt56) { + if (hdlc->cbin == fast_abort + [hdlc->bit_shift + 1]) { + hdlc->state = + HDLC_FAST_IDLE; + hdlc->bit_shift = 1; + break; + } + } else + hdlc->state = HDLC_GET_FLAG_B0; + break; + default: + hdlc->shift_reg >>= 1; + hdlc->shift_reg |= 0x80; + hdlc->data_bits++; + break; + } + } else { + switch (hdlc->hdlc_bits1) { + case 5: + break; + case 6: + if (hdlc->data_received) + status = check_frame(hdlc); + hdlc->crc = 0xffff; + hdlc->shift_reg = 0; + hdlc->data_bits = 0; + if (!hdlc->do_adapt56) + handle_fast_flag(hdlc); + else { + hdlc->state = HDLC_GET_DATA; + hdlc->data_received = 0; + } + break; + default: + hdlc->shift_reg >>= 1; + hdlc->data_bits++; + break; + } + hdlc->hdlc_bits1 = 0; + } + if (status) { + hdlc->dstpos = 0; + *count -= slen; + hdlc->cbin <<= 1; + hdlc->bit_shift--; + return status; + } + if (hdlc->data_bits == 8) { + hdlc->data_bits = 0; + hdlc->data_received = 1; + hdlc->crc = crc_ccitt_byte(hdlc->crc, + hdlc->shift_reg); + + /* good byte received */ + if (hdlc->dstpos < dsize) + dst[hdlc->dstpos++] = hdlc->shift_reg; + else { + /* frame too long */ + status = -HDLC_LENGTH_ERROR; + hdlc->dstpos = 0; + } + } + hdlc->cbin <<= 1; + hdlc->bit_shift--; + break; + case HDLC_FAST_FLAG: + if (hdlc->cbin == hdlc->ffvalue) { + hdlc->bit_shift = 0; + break; + } else { + if (hdlc->cbin == 0xff) { + hdlc->state = HDLC_FAST_IDLE; + hdlc->bit_shift = 0; + } else if (hdlc->ffbit_shift == 8) { + hdlc->state = HDLC_GETFLAG_B7; + break; + } else + handle_abort(hdlc); + } + break; + default: + break; + } + } + *count -= slen; + return 0; +} +EXPORT_SYMBOL(isdnhdlc_decode); +/* + isdnhdlc_encode - encodes HDLC frames to a transparent bit stream. + + The bit stream starts with a beginning flag (01111110). After + that each byte is added to the bit stream with bit stuffing added + (0 after 5 1's). + When the last byte has been removed from the source buffer, the + CRC (2 bytes is added) and the frame terminates with the ending flag. + For the dchannel, the idle character (all 1's) is also added at the end. + If this function is called with empty source buffer (slen=0), flags or + idle character will be generated. + + src - source buffer + slen - source buffer length + count - number of bytes removed (encoded) from source buffer + dst _ destination buffer + dsize - destination buffer size + returns - number of encoded bytes in the destination buffer +*/ +int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, + int *count, u8 *dst, int dsize) +{ + static const unsigned char xfast_flag_value[] = { + 0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e + }; + + int len = 0; + + *count = slen; + + /* special handling for one byte frames */ + if ((slen == 1) && (hdlc->state == HDLC_SEND_FAST_FLAG)) + hdlc->state = HDLC_SENDFLAG_ONE; + while (dsize > 0) { + if (hdlc->bit_shift == 0) { + if (slen && !hdlc->do_closing) { + hdlc->shift_reg = *src++; + slen--; + if (slen == 0) + /* closing sequence, CRC + flag(s) */ + hdlc->do_closing = 1; + hdlc->bit_shift = 8; + } else { + if (hdlc->state == HDLC_SEND_DATA) { + if (hdlc->data_received) { + hdlc->state = HDLC_SEND_CRC1; + hdlc->crc ^= 0xffff; + hdlc->bit_shift = 8; + hdlc->shift_reg = + hdlc->crc & 0xff; + } else if (!hdlc->do_adapt56) + hdlc->state = + HDLC_SEND_FAST_FLAG; + else + hdlc->state = + HDLC_SENDFLAG_B0; + } + + } + } + + switch (hdlc->state) { + case STOPPED: + while (dsize--) + *dst++ = 0xff; + return dsize; + case HDLC_SEND_FAST_FLAG: + hdlc->do_closing = 0; + if (slen == 0) { + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->ffvalue); + else + *dst++ = hdlc->ffvalue; + len++; + dsize--; + break; + } + /* fall through */ + case HDLC_SENDFLAG_ONE: + if (hdlc->bit_shift == 8) { + hdlc->cbin = hdlc->ffvalue >> + (8 - hdlc->data_bits); + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + hdlc->data_received = 1; + } + break; + case HDLC_SENDFLAG_B0: + hdlc->do_closing = 0; + hdlc->cbin <<= 1; + hdlc->data_bits++; + hdlc->hdlc_bits1 = 0; + hdlc->state = HDLC_SENDFLAG_B1A6; + break; + case HDLC_SENDFLAG_B1A6: + hdlc->cbin <<= 1; + hdlc->data_bits++; + hdlc->cbin++; + if (++hdlc->hdlc_bits1 == 6) + hdlc->state = HDLC_SENDFLAG_B7; + break; + case HDLC_SENDFLAG_B7: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (slen == 0) { + hdlc->state = HDLC_SENDFLAG_B0; + break; + } + if (hdlc->bit_shift == 8) { + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + hdlc->data_received = 1; + } + break; + case HDLC_SEND_FIRST_FLAG: + hdlc->data_received = 1; + if (hdlc->data_bits == 8) { + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + break; + } + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (hdlc->shift_reg & 0x01) + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + if (hdlc->bit_shift == 0) { + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + } + break; + case HDLC_SEND_DATA: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (hdlc->hdlc_bits1 == 5) { + hdlc->hdlc_bits1 = 0; + break; + } + if (hdlc->bit_shift == 8) + hdlc->crc = crc_ccitt_byte(hdlc->crc, + hdlc->shift_reg); + if (hdlc->shift_reg & 0x01) { + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + break; + case HDLC_SEND_CRC1: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (hdlc->hdlc_bits1 == 5) { + hdlc->hdlc_bits1 = 0; + break; + } + if (hdlc->shift_reg & 0x01) { + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + if (hdlc->bit_shift == 0) { + hdlc->shift_reg = (hdlc->crc >> 8); + hdlc->state = HDLC_SEND_CRC2; + hdlc->bit_shift = 8; + } + break; + case HDLC_SEND_CRC2: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (hdlc->hdlc_bits1 == 5) { + hdlc->hdlc_bits1 = 0; + break; + } + if (hdlc->shift_reg & 0x01) { + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + if (hdlc->bit_shift == 0) { + hdlc->shift_reg = 0x7e; + hdlc->state = HDLC_SEND_CLOSING_FLAG; + hdlc->bit_shift = 8; + } + break; + case HDLC_SEND_CLOSING_FLAG: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if (hdlc->hdlc_bits1 == 5) { + hdlc->hdlc_bits1 = 0; + break; + } + if (hdlc->shift_reg & 0x01) + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + if (hdlc->bit_shift == 0) { + hdlc->ffvalue = + xfast_flag_value[hdlc->data_bits]; + if (hdlc->dchannel) { + hdlc->ffvalue = 0x7e; + hdlc->state = HDLC_SEND_IDLE1; + hdlc->bit_shift = 8-hdlc->data_bits; + if (hdlc->bit_shift == 0) + hdlc->state = + HDLC_SEND_FAST_IDLE; + } else { + if (!hdlc->do_adapt56) { + hdlc->state = + HDLC_SEND_FAST_FLAG; + hdlc->data_received = 0; + } else { + hdlc->state = HDLC_SENDFLAG_B0; + hdlc->data_received = 0; + } + /* Finished this frame, send flags */ + if (dsize > 1) + dsize = 1; + } + } + break; + case HDLC_SEND_IDLE1: + hdlc->do_closing = 0; + hdlc->cbin <<= 1; + hdlc->cbin++; + hdlc->data_bits++; + hdlc->bit_shift--; + if (hdlc->bit_shift == 0) { + hdlc->state = HDLC_SEND_FAST_IDLE; + hdlc->bit_shift = 0; + } + break; + case HDLC_SEND_FAST_IDLE: + hdlc->do_closing = 0; + hdlc->cbin = 0xff; + hdlc->data_bits = 8; + if (hdlc->bit_shift == 8) { + hdlc->cbin = 0x7e; + hdlc->state = HDLC_SEND_FIRST_FLAG; + } else { + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->cbin); + else + *dst++ = hdlc->cbin; + hdlc->bit_shift = 0; + hdlc->data_bits = 0; + len++; + dsize = 0; + } + break; + default: + break; + } + if (hdlc->do_adapt56) { + if (hdlc->data_bits == 7) { + hdlc->cbin <<= 1; + hdlc->cbin++; + hdlc->data_bits++; + } + } + if (hdlc->data_bits == 8) { + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->cbin); + else + *dst++ = hdlc->cbin; + hdlc->data_bits = 0; + len++; + dsize--; + } + } + *count -= slen; + + return len; +} +EXPORT_SYMBOL(isdnhdlc_encode); diff --git a/drivers/isdn/hardware/mISDN/isdnhdlc.h b/drivers/isdn/hardware/mISDN/isdnhdlc.h new file mode 100644 index 000000000000..96521370c782 --- /dev/null +++ b/drivers/isdn/hardware/mISDN/isdnhdlc.h @@ -0,0 +1,82 @@ +/* + * hdlc.h -- General purpose ISDN HDLC decoder. + * + * Implementation of a HDLC decoder/encoder in software. + * Necessary because some ISDN devices don't have HDLC + * controllers. + * + * Copyright (C) + * 2009 Karsten Keil + * 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ISDNHDLC_H__ +#define __ISDNHDLC_H__ + +struct isdnhdlc_vars { + int bit_shift; + int hdlc_bits1; + int data_bits; + int ffbit_shift; /* encoding only */ + int state; + int dstpos; + + u16 crc; + + u8 cbin; + u8 shift_reg; + u8 ffvalue; + + /* set if transferring data */ + u32 data_received:1; + /* set if D channel (send idle instead of flags) */ + u32 dchannel:1; + /* set if 56K adaptation */ + u32 do_adapt56:1; + /* set if in closing phase (need to send CRC + flag) */ + u32 do_closing:1; + /* set if data is bitreverse */ + u32 do_bitreverse:1; +}; + +/* Feature Flags */ +#define HDLC_56KBIT 0x01 +#define HDLC_DCHANNEL 0x02 +#define HDLC_BITREVERSE 0x04 + +/* + The return value from isdnhdlc_decode is + the frame length, 0 if no complete frame was decoded, + or a negative error number +*/ +#define HDLC_FRAMING_ERROR 1 +#define HDLC_CRC_ERROR 2 +#define HDLC_LENGTH_ERROR 3 + +extern void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features); + +extern int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, + int slen, int *count, u8 *dst, int dsize); + +extern void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features); + +extern int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, + u16 slen, int *count, u8 *dst, int dsize); + +#endif /* __ISDNHDLC_H__ */ diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 2b317cb63d06..93a2d361eda5 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -29,7 +29,7 @@ #include "ipac.h" #include "iohelper.h" #include "netjet.h" -#include +#include "isdnhdlc.h" #define NETJET_REV "2.0" diff --git a/drivers/isdn/i4l/Makefile b/drivers/isdn/i4l/Makefile deleted file mode 100644 index 11fe697739d5..000000000000 --- a/drivers/isdn/i4l/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for the kernel ISDN subsystem and device drivers. - -# Each configuration option enables a list of files. - -obj-$(CONFIG_ISDN_HDLC) += isdnhdlc.o diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c deleted file mode 100644 index 027d1c590679..000000000000 --- a/drivers/isdn/i4l/isdnhdlc.c +++ /dev/null @@ -1,630 +0,0 @@ -/* - * isdnhdlc.c -- General purpose ISDN HDLC decoder. - * - * Copyright (C) - * 2009 Karsten Keil - * 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include - -/*-------------------------------------------------------------------*/ - -MODULE_AUTHOR("Wolfgang Mües , " - "Frode Isaksen , " - "Kai Germaschewski "); -MODULE_DESCRIPTION("General purpose ISDN HDLC decoder"); -MODULE_LICENSE("GPL"); - -/*-------------------------------------------------------------------*/ - -enum { - HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7, - HDLC_GET_DATA, HDLC_FAST_FLAG -}; - -enum { - HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG, - HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG, - HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0, - HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED, HDLC_SENDFLAG_ONE -}; - -void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features) -{ - memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); - hdlc->state = HDLC_GET_DATA; - if (features & HDLC_56KBIT) - hdlc->do_adapt56 = 1; - if (features & HDLC_BITREVERSE) - hdlc->do_bitreverse = 1; -} -EXPORT_SYMBOL(isdnhdlc_out_init); - -void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features) -{ - memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); - if (features & HDLC_DCHANNEL) { - hdlc->dchannel = 1; - hdlc->state = HDLC_SEND_FIRST_FLAG; - } else { - hdlc->dchannel = 0; - hdlc->state = HDLC_SEND_FAST_FLAG; - hdlc->ffvalue = 0x7e; - } - hdlc->cbin = 0x7e; - if (features & HDLC_56KBIT) { - hdlc->do_adapt56 = 1; - hdlc->state = HDLC_SENDFLAG_B0; - } else - hdlc->data_bits = 8; - if (features & HDLC_BITREVERSE) - hdlc->do_bitreverse = 1; -} -EXPORT_SYMBOL(isdnhdlc_rcv_init); - -static int -check_frame(struct isdnhdlc_vars *hdlc) -{ - int status; - - if (hdlc->dstpos < 2) /* too small - framing error */ - status = -HDLC_FRAMING_ERROR; - else if (hdlc->crc != 0xf0b8) /* crc error */ - status = -HDLC_CRC_ERROR; - else { - /* remove CRC */ - hdlc->dstpos -= 2; - /* good frame */ - status = hdlc->dstpos; - } - return status; -} - -/* - isdnhdlc_decode - decodes HDLC frames from a transparent bit stream. - - The source buffer is scanned for valid HDLC frames looking for - flags (01111110) to indicate the start of a frame. If the start of - the frame is found, the bit stuffing is removed (0 after 5 1's). - When a new flag is found, the complete frame has been received - and the CRC is checked. - If a valid frame is found, the function returns the frame length - excluding the CRC with the bit HDLC_END_OF_FRAME set. - If the beginning of a valid frame is found, the function returns - the length. - If a framing error is found (too many 1s and not a flag) the function - returns the length with the bit HDLC_FRAMING_ERROR set. - If a CRC error is found the function returns the length with the - bit HDLC_CRC_ERROR set. - If the frame length exceeds the destination buffer size, the function - returns the length with the bit HDLC_LENGTH_ERROR set. - - src - source buffer - slen - source buffer length - count - number of bytes removed (decoded) from the source buffer - dst _ destination buffer - dsize - destination buffer size - returns - number of decoded bytes in the destination buffer and status - flag. -*/ -int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen, - int *count, u8 *dst, int dsize) -{ - int status = 0; - - static const unsigned char fast_flag[] = { - 0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f - }; - - static const unsigned char fast_flag_value[] = { - 0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f - }; - - static const unsigned char fast_abort[] = { - 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff - }; - -#define handle_fast_flag(h) \ - do { \ - if (h->cbin == fast_flag[h->bit_shift]) { \ - h->ffvalue = fast_flag_value[h->bit_shift]; \ - h->state = HDLC_FAST_FLAG; \ - h->ffbit_shift = h->bit_shift; \ - h->bit_shift = 1; \ - } else { \ - h->state = HDLC_GET_DATA; \ - h->data_received = 0; \ - } \ - } while (0) - -#define handle_abort(h) \ - do { \ - h->shift_reg = fast_abort[h->ffbit_shift - 1]; \ - h->hdlc_bits1 = h->ffbit_shift - 2; \ - if (h->hdlc_bits1 < 0) \ - h->hdlc_bits1 = 0; \ - h->data_bits = h->ffbit_shift - 1; \ - h->state = HDLC_GET_DATA; \ - h->data_received = 0; \ - } while (0) - - *count = slen; - - while (slen > 0) { - if (hdlc->bit_shift == 0) { - /* the code is for bitreverse streams */ - if (hdlc->do_bitreverse == 0) - hdlc->cbin = bitrev8(*src++); - else - hdlc->cbin = *src++; - slen--; - hdlc->bit_shift = 8; - if (hdlc->do_adapt56) - hdlc->bit_shift--; - } - - switch (hdlc->state) { - case STOPPED: - return 0; - case HDLC_FAST_IDLE: - if (hdlc->cbin == 0xff) { - hdlc->bit_shift = 0; - break; - } - hdlc->state = HDLC_GET_FLAG_B0; - hdlc->hdlc_bits1 = 0; - hdlc->bit_shift = 8; - break; - case HDLC_GET_FLAG_B0: - if (!(hdlc->cbin & 0x80)) { - hdlc->state = HDLC_GETFLAG_B1A6; - hdlc->hdlc_bits1 = 0; - } else { - if ((!hdlc->do_adapt56) && - (++hdlc->hdlc_bits1 >= 8) && - (hdlc->bit_shift == 1)) - hdlc->state = HDLC_FAST_IDLE; - } - hdlc->cbin <<= 1; - hdlc->bit_shift--; - break; - case HDLC_GETFLAG_B1A6: - if (hdlc->cbin & 0x80) { - hdlc->hdlc_bits1++; - if (hdlc->hdlc_bits1 == 6) - hdlc->state = HDLC_GETFLAG_B7; - } else - hdlc->hdlc_bits1 = 0; - hdlc->cbin <<= 1; - hdlc->bit_shift--; - break; - case HDLC_GETFLAG_B7: - if (hdlc->cbin & 0x80) { - hdlc->state = HDLC_GET_FLAG_B0; - } else { - hdlc->state = HDLC_GET_DATA; - hdlc->crc = 0xffff; - hdlc->shift_reg = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->data_received = 0; - } - hdlc->cbin <<= 1; - hdlc->bit_shift--; - break; - case HDLC_GET_DATA: - if (hdlc->cbin & 0x80) { - hdlc->hdlc_bits1++; - switch (hdlc->hdlc_bits1) { - case 6: - break; - case 7: - if (hdlc->data_received) - /* bad frame */ - status = -HDLC_FRAMING_ERROR; - if (!hdlc->do_adapt56) { - if (hdlc->cbin == fast_abort - [hdlc->bit_shift + 1]) { - hdlc->state = - HDLC_FAST_IDLE; - hdlc->bit_shift = 1; - break; - } - } else - hdlc->state = HDLC_GET_FLAG_B0; - break; - default: - hdlc->shift_reg >>= 1; - hdlc->shift_reg |= 0x80; - hdlc->data_bits++; - break; - } - } else { - switch (hdlc->hdlc_bits1) { - case 5: - break; - case 6: - if (hdlc->data_received) - status = check_frame(hdlc); - hdlc->crc = 0xffff; - hdlc->shift_reg = 0; - hdlc->data_bits = 0; - if (!hdlc->do_adapt56) - handle_fast_flag(hdlc); - else { - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } - break; - default: - hdlc->shift_reg >>= 1; - hdlc->data_bits++; - break; - } - hdlc->hdlc_bits1 = 0; - } - if (status) { - hdlc->dstpos = 0; - *count -= slen; - hdlc->cbin <<= 1; - hdlc->bit_shift--; - return status; - } - if (hdlc->data_bits == 8) { - hdlc->data_bits = 0; - hdlc->data_received = 1; - hdlc->crc = crc_ccitt_byte(hdlc->crc, - hdlc->shift_reg); - - /* good byte received */ - if (hdlc->dstpos < dsize) - dst[hdlc->dstpos++] = hdlc->shift_reg; - else { - /* frame too long */ - status = -HDLC_LENGTH_ERROR; - hdlc->dstpos = 0; - } - } - hdlc->cbin <<= 1; - hdlc->bit_shift--; - break; - case HDLC_FAST_FLAG: - if (hdlc->cbin == hdlc->ffvalue) { - hdlc->bit_shift = 0; - break; - } else { - if (hdlc->cbin == 0xff) { - hdlc->state = HDLC_FAST_IDLE; - hdlc->bit_shift = 0; - } else if (hdlc->ffbit_shift == 8) { - hdlc->state = HDLC_GETFLAG_B7; - break; - } else - handle_abort(hdlc); - } - break; - default: - break; - } - } - *count -= slen; - return 0; -} -EXPORT_SYMBOL(isdnhdlc_decode); -/* - isdnhdlc_encode - encodes HDLC frames to a transparent bit stream. - - The bit stream starts with a beginning flag (01111110). After - that each byte is added to the bit stream with bit stuffing added - (0 after 5 1's). - When the last byte has been removed from the source buffer, the - CRC (2 bytes is added) and the frame terminates with the ending flag. - For the dchannel, the idle character (all 1's) is also added at the end. - If this function is called with empty source buffer (slen=0), flags or - idle character will be generated. - - src - source buffer - slen - source buffer length - count - number of bytes removed (encoded) from source buffer - dst _ destination buffer - dsize - destination buffer size - returns - number of encoded bytes in the destination buffer -*/ -int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, - int *count, u8 *dst, int dsize) -{ - static const unsigned char xfast_flag_value[] = { - 0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e - }; - - int len = 0; - - *count = slen; - - /* special handling for one byte frames */ - if ((slen == 1) && (hdlc->state == HDLC_SEND_FAST_FLAG)) - hdlc->state = HDLC_SENDFLAG_ONE; - while (dsize > 0) { - if (hdlc->bit_shift == 0) { - if (slen && !hdlc->do_closing) { - hdlc->shift_reg = *src++; - slen--; - if (slen == 0) - /* closing sequence, CRC + flag(s) */ - hdlc->do_closing = 1; - hdlc->bit_shift = 8; - } else { - if (hdlc->state == HDLC_SEND_DATA) { - if (hdlc->data_received) { - hdlc->state = HDLC_SEND_CRC1; - hdlc->crc ^= 0xffff; - hdlc->bit_shift = 8; - hdlc->shift_reg = - hdlc->crc & 0xff; - } else if (!hdlc->do_adapt56) - hdlc->state = - HDLC_SEND_FAST_FLAG; - else - hdlc->state = - HDLC_SENDFLAG_B0; - } - - } - } - - switch (hdlc->state) { - case STOPPED: - while (dsize--) - *dst++ = 0xff; - return dsize; - case HDLC_SEND_FAST_FLAG: - hdlc->do_closing = 0; - if (slen == 0) { - /* the code is for bitreverse streams */ - if (hdlc->do_bitreverse == 0) - *dst++ = bitrev8(hdlc->ffvalue); - else - *dst++ = hdlc->ffvalue; - len++; - dsize--; - break; - } - /* fall through */ - case HDLC_SENDFLAG_ONE: - if (hdlc->bit_shift == 8) { - hdlc->cbin = hdlc->ffvalue >> - (8 - hdlc->data_bits); - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - hdlc->data_received = 1; - } - break; - case HDLC_SENDFLAG_B0: - hdlc->do_closing = 0; - hdlc->cbin <<= 1; - hdlc->data_bits++; - hdlc->hdlc_bits1 = 0; - hdlc->state = HDLC_SENDFLAG_B1A6; - break; - case HDLC_SENDFLAG_B1A6: - hdlc->cbin <<= 1; - hdlc->data_bits++; - hdlc->cbin++; - if (++hdlc->hdlc_bits1 == 6) - hdlc->state = HDLC_SENDFLAG_B7; - break; - case HDLC_SENDFLAG_B7: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (slen == 0) { - hdlc->state = HDLC_SENDFLAG_B0; - break; - } - if (hdlc->bit_shift == 8) { - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - hdlc->data_received = 1; - } - break; - case HDLC_SEND_FIRST_FLAG: - hdlc->data_received = 1; - if (hdlc->data_bits == 8) { - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - break; - } - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (hdlc->shift_reg & 0x01) - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - if (hdlc->bit_shift == 0) { - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - } - break; - case HDLC_SEND_DATA: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (hdlc->hdlc_bits1 == 5) { - hdlc->hdlc_bits1 = 0; - break; - } - if (hdlc->bit_shift == 8) - hdlc->crc = crc_ccitt_byte(hdlc->crc, - hdlc->shift_reg); - if (hdlc->shift_reg & 0x01) { - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - break; - case HDLC_SEND_CRC1: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (hdlc->hdlc_bits1 == 5) { - hdlc->hdlc_bits1 = 0; - break; - } - if (hdlc->shift_reg & 0x01) { - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - if (hdlc->bit_shift == 0) { - hdlc->shift_reg = (hdlc->crc >> 8); - hdlc->state = HDLC_SEND_CRC2; - hdlc->bit_shift = 8; - } - break; - case HDLC_SEND_CRC2: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (hdlc->hdlc_bits1 == 5) { - hdlc->hdlc_bits1 = 0; - break; - } - if (hdlc->shift_reg & 0x01) { - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - if (hdlc->bit_shift == 0) { - hdlc->shift_reg = 0x7e; - hdlc->state = HDLC_SEND_CLOSING_FLAG; - hdlc->bit_shift = 8; - } - break; - case HDLC_SEND_CLOSING_FLAG: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if (hdlc->hdlc_bits1 == 5) { - hdlc->hdlc_bits1 = 0; - break; - } - if (hdlc->shift_reg & 0x01) - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - if (hdlc->bit_shift == 0) { - hdlc->ffvalue = - xfast_flag_value[hdlc->data_bits]; - if (hdlc->dchannel) { - hdlc->ffvalue = 0x7e; - hdlc->state = HDLC_SEND_IDLE1; - hdlc->bit_shift = 8-hdlc->data_bits; - if (hdlc->bit_shift == 0) - hdlc->state = - HDLC_SEND_FAST_IDLE; - } else { - if (!hdlc->do_adapt56) { - hdlc->state = - HDLC_SEND_FAST_FLAG; - hdlc->data_received = 0; - } else { - hdlc->state = HDLC_SENDFLAG_B0; - hdlc->data_received = 0; - } - /* Finished this frame, send flags */ - if (dsize > 1) - dsize = 1; - } - } - break; - case HDLC_SEND_IDLE1: - hdlc->do_closing = 0; - hdlc->cbin <<= 1; - hdlc->cbin++; - hdlc->data_bits++; - hdlc->bit_shift--; - if (hdlc->bit_shift == 0) { - hdlc->state = HDLC_SEND_FAST_IDLE; - hdlc->bit_shift = 0; - } - break; - case HDLC_SEND_FAST_IDLE: - hdlc->do_closing = 0; - hdlc->cbin = 0xff; - hdlc->data_bits = 8; - if (hdlc->bit_shift == 8) { - hdlc->cbin = 0x7e; - hdlc->state = HDLC_SEND_FIRST_FLAG; - } else { - /* the code is for bitreverse streams */ - if (hdlc->do_bitreverse == 0) - *dst++ = bitrev8(hdlc->cbin); - else - *dst++ = hdlc->cbin; - hdlc->bit_shift = 0; - hdlc->data_bits = 0; - len++; - dsize = 0; - } - break; - default: - break; - } - if (hdlc->do_adapt56) { - if (hdlc->data_bits == 7) { - hdlc->cbin <<= 1; - hdlc->cbin++; - hdlc->data_bits++; - } - } - if (hdlc->data_bits == 8) { - /* the code is for bitreverse streams */ - if (hdlc->do_bitreverse == 0) - *dst++ = bitrev8(hdlc->cbin); - else - *dst++ = hdlc->cbin; - hdlc->data_bits = 0; - len++; - dsize--; - } - } - *count -= slen; - - return len; -} -EXPORT_SYMBOL(isdnhdlc_encode); diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h deleted file mode 100644 index 96521370c782..000000000000 --- a/include/linux/isdn/hdlc.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * hdlc.h -- General purpose ISDN HDLC decoder. - * - * Implementation of a HDLC decoder/encoder in software. - * Necessary because some ISDN devices don't have HDLC - * controllers. - * - * Copyright (C) - * 2009 Karsten Keil - * 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __ISDNHDLC_H__ -#define __ISDNHDLC_H__ - -struct isdnhdlc_vars { - int bit_shift; - int hdlc_bits1; - int data_bits; - int ffbit_shift; /* encoding only */ - int state; - int dstpos; - - u16 crc; - - u8 cbin; - u8 shift_reg; - u8 ffvalue; - - /* set if transferring data */ - u32 data_received:1; - /* set if D channel (send idle instead of flags) */ - u32 dchannel:1; - /* set if 56K adaptation */ - u32 do_adapt56:1; - /* set if in closing phase (need to send CRC + flag) */ - u32 do_closing:1; - /* set if data is bitreverse */ - u32 do_bitreverse:1; -}; - -/* Feature Flags */ -#define HDLC_56KBIT 0x01 -#define HDLC_DCHANNEL 0x02 -#define HDLC_BITREVERSE 0x04 - -/* - The return value from isdnhdlc_decode is - the frame length, 0 if no complete frame was decoded, - or a negative error number -*/ -#define HDLC_FRAMING_ERROR 1 -#define HDLC_CRC_ERROR 2 -#define HDLC_LENGTH_ERROR 3 - -extern void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features); - -extern int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, - int slen, int *count, u8 *dst, int dsize); - -extern void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features); - -extern int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, - u16 slen, int *count, u8 *dst, int dsize); - -#endif /* __ISDNHDLC_H__ */ -- cgit v1.2.3 From 2cf6bffc49dae26edd12af6b57c8c780590380bf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 May 2019 15:44:12 +0200 Subject: netfilter: replace skb_make_writable with skb_ensure_writable This converts all remaining users and then removes skb_make_writable. Suggested-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 5 ----- net/netfilter/core.c | 22 ---------------------- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_DSCP.c | 8 ++++---- 5 files changed, 6 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 996bc247ef6e..049aeb40fa35 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -336,11 +336,6 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); #endif -/* Call this before modifying an existing packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -int skb_make_writable(struct sk_buff *skb, unsigned int writable_len); - struct flowi; struct nf_queue_entry; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b96fd3f54705..817a9e5d16e4 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -536,28 +536,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, } EXPORT_SYMBOL(nf_hook_slow); - -int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) -{ - if (writable_len > skb->len) - return 0; - - /* Not exclusive use of packet? Must copy. */ - if (!skb_cloned(skb)) { - if (writable_len <= skb_headlen(skb)) - return 1; - } else if (skb_clone_writable(skb, writable_len)) - return 1; - - if (writable_len <= skb_headlen(skb)) - writable_len = 0; - else - writable_len -= skb_headlen(skb); - - return !!__pskb_pull_tail(skb, writable_len); -} -EXPORT_SYMBOL(skb_make_writable); - /* This needs to be compiled in any case to avoid dependencies between the * nfnetlink_queue code and nf_conntrack. */ diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 8ff4d22f10b2..3d58a9e93e5a 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -196,7 +196,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, optoff = protoff + sizeof(struct tcphdr); optend = protoff + th->doff * 4; - if (!skb_make_writable(skb, optend)) + if (skb_ensure_writable(skb, optend)) return 0; while (optoff < optend) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 27dac47b29c2..831f57008d78 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -863,7 +863,7 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) } skb_put(e->skb, diff); } - if (!skb_make_writable(e->skb, data_len)) + if (skb_ensure_writable(e->skb, data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 098ed851b7a7..30d554d6c213 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -34,7 +34,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (skb_ensure_writable(skb, sizeof(struct iphdr))) return NF_DROP; ipv4_change_dsfield(ip_hdr(skb), @@ -52,7 +52,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + if (skb_ensure_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; ipv6_change_dsfield(ipv6_hdr(skb), @@ -82,7 +82,7 @@ tos_tg(struct sk_buff *skb, const struct xt_action_param *par) nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (skb_ensure_writable(skb, sizeof(struct iphdr))) return NF_DROP; iph = ip_hdr(skb); ipv4_change_dsfield(iph, 0, nv); @@ -102,7 +102,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (skb_ensure_writable(skb, sizeof(struct iphdr))) return NF_DROP; iph = ipv6_hdr(skb); ipv6_change_dsfield(iph, 0, nv); -- cgit v1.2.3 From c9bb6165a16e6d5498981a6c777b94a78e74462b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 31 May 2019 11:15:26 +0200 Subject: netfilter: nf_conntrack_bridge: fix CONFIG_IPV6=y This patch fixes a few problems with CONFIG_IPV6=y and CONFIG_NF_CONNTRACK_BRIDGE=m: In file included from net/netfilter/utils.c:5: include/linux/netfilter_ipv6.h: In function 'nf_ipv6_br_defrag': include/linux/netfilter_ipv6.h:110:9: error: implicit declaration of function 'nf_ct_frag6_gather'; did you mean 'nf_ct_attach'? [-Werror=implicit-function-declaration] And these too: net/ipv6/netfilter.c:242:2: error: unknown field 'br_defrag' specified in initializer net/ipv6/netfilter.c:243:2: error: unknown field 'br_fragment' specified in initializer This patch includes an original chunk from wenxu. Fixes: 764dd163ac92 ("netfilter: nf_conntrack_bridge: add support for IPv6") Reported-by: Stephen Rothwell Reported-by: Yuehaibing Reported-by: kbuild test robot Reported-by: wenxu Signed-off-by: Pablo Neira Ayuso Signed-off-by: wenxu Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 2 ++ net/ipv6/netfilter.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index a21b8c9623ee..3a3dc4b1f0e7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -96,6 +96,8 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, #endif } +#include + static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb, u32 user) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index c6665382acb5..9530cc280953 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -238,7 +238,7 @@ static const struct nf_ipv6_ops ipv6ops = { .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, -#if IS_MODULE(CONFIG_NF_CONNTRACK_BRIDGE) +#if IS_MODULE(CONFIG_IPV6) .br_defrag = nf_ct_frag6_gather, .br_fragment = br_ip6_fragment, #endif -- cgit v1.2.3 From a5e112e6424adb77d953eac20e6936b952fd6b32 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 May 2019 12:37:17 -0700 Subject: cgroup: add cgroup_parse_float() cgroup already uses floating point for percent[ile] numbers and there are several controllers which want to take them as input. Add a generic parse helper to handle inputs. Update the interface convention documentation about the use of percentage numbers. While at it, also clarify the default time unit. Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 6 +++++ include/linux/cgroup.h | 2 ++ kernel/cgroup/cgroup.c | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 88e746074252..73b0c0d8df31 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -696,6 +696,12 @@ Conventions informational files on the root cgroup which end up showing global information available elsewhere shouldn't exist. +- The default time unit is microseconds. If a different unit is ever + used, an explicit unit suffix must be present. + +- A parts-per quantity should use a percentage decimal with at least + two digit fractional part - e.g. 13.40. + - If a controller implements weight based resource distribution, its interface file should be named "weight" and have the range [1, 10000] with 100 as the default. The values are chosen to allow diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0297f930a56e..3745ecdad925 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -131,6 +131,8 @@ void cgroup_free(struct task_struct *p); int cgroup_init_early(void); int cgroup_init(void); +int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v); + /* * Iteration helpers and macros. */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a7df319c2e9a..7dffcfe17441 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6387,4 +6387,47 @@ static int __init cgroup_sysfs_init(void) return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group); } subsys_initcall(cgroup_sysfs_init); + +static u64 power_of_ten(int power) +{ + u64 v = 1; + while (power--) + v *= 10; + return v; +} + +/** + * cgroup_parse_float - parse a floating number + * @input: input string + * @dec_shift: number of decimal digits to shift + * @v: output + * + * Parse a decimal floating point number in @input and store the result in + * @v with decimal point right shifted @dec_shift times. For example, if + * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. + * Returns 0 on success, -errno otherwise. + * + * There's nothing cgroup specific about this function except that it's + * currently the only user. + */ +int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) +{ + s64 whole, frac = 0; + int fstart = 0, fend = 0, flen; + + if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) + return -EINVAL; + if (frac < 0) + return -EINVAL; + + flen = fend > fstart ? fend - fstart : 0; + if (flen < dec_shift) + frac *= power_of_ten(dec_shift - flen); + else + frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); + + *v = whole * power_of_ten(dec_shift) + frac; + return 0; +} + #endif /* CONFIG_SYSFS */ -- cgit v1.2.3 From 0b9055a112fd86c07b9d4857b61019485ec6526f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 29 May 2019 22:50:24 +0000 Subject: net/mlx5: Add core dump register access HW bits Add Firmware core dump registers and HW definitions. Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a27246db883..b5431f7d97cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_FPGA_ACCESS_REG = 0x4024, + MLX5_REG_CORE_DUMP = 0x402e, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5e74305e2e57..7ee422e38826 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -715,7 +715,9 @@ struct mlx5_ifc_qos_cap_bits { }; struct mlx5_ifc_debug_cap_bits { - u8 reserved_at_0[0x20]; + u8 core_dump_general[0x1]; + u8 core_dump_qp[0x1]; + u8 reserved_at_2[0x1e]; u8 reserved_at_20[0x2]; u8 stall_detect[0x1]; @@ -2531,6 +2533,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; + struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; u8 reserved_at_0[0x8000]; }; @@ -8546,6 +8549,18 @@ struct mlx5_ifc_qcam_reg_bits { u8 reserved_at_1c0[0x80]; }; +struct mlx5_ifc_core_dump_reg_bits { + u8 reserved_at_0[0x18]; + u8 core_dump_type[0x8]; + + u8 reserved_at_20[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x8]; + u8 qpn[0x18]; + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From c6d4e45d3b44b71227588c2f76615380b3961f96 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 29 May 2019 22:50:29 +0000 Subject: net/mlx5: Introduce termination table bits Termination table is a flow table with a termination flag. The flag allows the firmware to assume that the the specified actions are the last actions list. This assumption allows the FW to safely perform potential looping logic (e.g. hairpin). Introduce the bits for this attribute. Signed-off-by: Eli Britstein Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 013b1ca4a791..bb24c3797218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; struct mlx5_core_dev *dev = ns->dev; @@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e690ba0f965c..2ddaa97f2179 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -47,6 +47,7 @@ enum { enum { MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), + MLX5_FLOW_TABLE_TERMINATION = BIT(2), }; #define LEFTOVERS_RULE_NUM 2 diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7ee422e38826..feaa909bf14f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -382,7 +382,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_and_modify_action[0x1]; u8 reserved_at_15[0x2]; u8 table_miss_action_domain[0x1]; - u8 reserved_at_18[0x8]; + u8 termination_table[0x1]; + u8 reserved_at_19[0x7]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -7239,7 +7240,8 @@ struct mlx5_ifc_create_flow_table_out_bits { struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; - u8 reserved_at_2[0x2]; + u8 reserved_at_2[0x1]; + u8 termination_table[0x1]; u8 table_miss_action[0x4]; u8 level[0x8]; u8 reserved_at_10[0x8]; -- cgit v1.2.3 From cd56f929e6a547180f889a4def370bdd6d48d223 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:34 +0000 Subject: net/mlx5: E-Switch, Replace host_params event with functions_changed event To support sriov on a E-Switch manager, num_vfs are queried to the firmware whenever E-Switch manager is notified by esw_functions_changed event. Replace host_params event with esw_functions_changed event that reflects more appropriate naming. While at it, also correct num_vfs type from int to u16 as expected by the function mlx5_esw_query_functions(). Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 27 --------- drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 4 -- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 69 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/events.c | 4 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 6 +- 10 files changed, 86 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 937ba4bcb056..7d3aec98e31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -628,7 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 23883d1fa22f..052bd70e4aa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -534,7 +534,8 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ea0ccfe5ef5..d8935232964a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1686,13 +1686,41 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } +static int query_esw_functions(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; + int err; + + err = query_esw_functions(dev, out, sizeof(out)); + if (err) + return err; + + *num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); + + return 0; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; + int total_nvports = 0; + u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1712,7 +1740,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_OFFLOADS) { if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + err = mlx5_esw_query_functions(esw->dev, &vf_nvports); if (err) return err; total_nvports = esw->total_vports; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ed3fad689ec9..320dd83dd301 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -190,7 +190,7 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; @@ -219,7 +219,7 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); @@ -386,6 +386,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e09ae27485ee..83689678b400 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -41,7 +41,6 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" /* There are two match-all miss flows, one for unicast dst mac and @@ -1782,57 +1781,79 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_prio_tag_acls_cleanup(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void esw_functions_changed_event_handler(struct work_struct *work) { struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; - int err, num_vf = 0; + u16 num_vfs = 0; + int err; host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) + err = mlx5_esw_query_functions(esw->dev, &num_vfs); + if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + err = esw_offloads_load_vf_reps(esw, num_vfs); if (err) goto out; } - esw->host_info.num_vfs = num_vf; + esw->esw_funcs.num_vfs = num_vfs; out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +static int esw_functions_changed_event(struct notifier_block *nb, + unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } +static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, + u16 vf_nvports) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + esw->esw_funcs.num_vfs = vf_nvports; +} + +static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + flush_workqueue(esw->work_queue); +} + int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, int total_nvports) { @@ -1848,12 +1869,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, esw_offloads_devcom_init(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + esw_functions_changed_event_init(esw, vf_nvports); mlx5_rdma_enable_roce(esw->dev); @@ -1887,13 +1903,12 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) { u16 num_vfs; - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { + esw_functions_changed_event_cleanup(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + num_vfs = esw->esw_funcs.num_vfs; + else num_vfs = esw->dev->priv.sriov.num_vfs; - } mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a81e8d2168d8..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fc2b6e807f06..5e760067ac41 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,7 +342,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, - MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index feaa909bf14f..0780242a757a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -155,7 +155,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, - MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9721,7 +9721,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_80[0x180]; }; -struct mlx5_ifc_query_host_params_in_bits { +struct mlx5_ifc_query_esw_functions_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -9731,7 +9731,7 @@ struct mlx5_ifc_query_host_params_in_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_query_host_params_out_bits { +struct mlx5_ifc_query_esw_functions_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 6706a3b94f890145ca09797f748d2b30e1414fd3 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:37 +0000 Subject: net/mlx5: E-Switch, Honor eswitch functions changed event cap Whenever device supports eswitch functions changed event, honor such device setting. Do not limit it to ECPF. Signed-off-by: Parav Pandit Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- include/linux/mlx5/mlx5_ifc.h | 4 +++- 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 052bd70e4aa6..5e9319d3d90c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -533,7 +533,7 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) + if (mlx5_eswitch_is_funcs_handler(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 320dd83dd301..b524813cccac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -406,6 +406,18 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -500,6 +512,7 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 83689678b400..05cb2fffd887 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1836,7 +1836,7 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, @@ -1847,7 +1847,7 @@ static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); @@ -1905,7 +1905,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) esw_functions_changed_event_cleanup(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (mlx5_eswitch_is_funcs_handler(esw->dev)) num_vfs = esw->esw_funcs.num_vfs; else num_vfs = esw->dev->priv.sriov.num_vfs; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0780242a757a..6513b985c5e9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x16]; + u8 reserved_at_5[0x14]; + u8 esw_functions_changed[0x1]; + u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; -- cgit v1.2.3 From 8693115af4c24d92b971ad895c5f329761ed5d38 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 29 May 2019 22:50:41 +0000 Subject: {IB,net}/mlx5: Constify rep ops functions pointers Currently for every representor type and for every single vport, representer function pointers copy is stored even though they don't change from one to other vport. Additionally priv data entry for the rep is not passed during registration, but its copied. It is used (set and cleared) by the user of the reps. As we want to scale vports, to simplify and also to split constants from data, 1. Rename mlx5_eswitch_rep_if to mlx5_eswitch_rep_ops as to match _ops prefix with other standard netdev, ibdev ops. 2. Constify the IB and Ethernet rep ops structure. 3. Instead of storing copy of all rep function pointers, store copy per eswitch rep type. 4. Split data and function pointers to mlx5_eswitch_rep_ops and mlx5_eswitch_rep_data. Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 19 ++++++----- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 15 +++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 38 ++++++++++------------ include/linux/mlx5/eswitch.h | 20 +++++++----- 7 files changed, 49 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index cbcc40d776b9..22e651cb5534 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -60,7 +60,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; - rep->rep_if[REP_IB].priv = ibdev; + rep->rep_data[REP_IB].priv = ibdev; return 0; } @@ -70,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv || + if (!rep->rep_data[REP_IB].priv || rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - rep->rep_if[REP_IB].priv = NULL; + rep->rep_data[REP_IB].priv = NULL; } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) @@ -84,16 +84,17 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5_ib_vport_rep_load, + .unload = mlx5_ib_vport_rep_unload, + .get_proto_dev = mlx5_ib_vport_get_proto_dev, +}; + void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index c995102b0276..22adce2d6795 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, static inline struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_IB].priv; + return rep->rep_data[REP_IB].priv; } #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 91e24f1cead8..33f8f99681a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1752,7 +1752,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1826,16 +1826,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c40c025afd99..e34573fd88c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b524813cccac..135d9a29bbdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -173,6 +173,7 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; u8 encap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 05cb2fffd887..d6246ee042fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -332,7 +332,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1276,7 +1276,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } @@ -1286,9 +1286,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_LOADED, REP_REGISTERED) == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1349,11 +1349,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = rep->rep_if[rep_type].load(esw->dev, rep); + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); if (err) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); } @@ -2216,21 +2216,17 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - atomic_set(&rep_if->state, REP_REGISTERED); + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2245,7 +2241,7 @@ void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) __unload_reps_all_vport(esw, max_vf, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2254,7 +2250,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, @@ -2265,9 +2261,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 0ca77dd1429c..d81ee4df181c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -29,17 +29,19 @@ enum { }; struct mlx5_eswitch_rep; -struct mlx5_eswitch_rep_if { - int (*load)(struct mlx5_core_dev *dev, - struct mlx5_eswitch_rep *rep); - void (*unload)(struct mlx5_eswitch_rep *rep); - void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); - void *priv; - atomic_t state; +struct mlx5_eswitch_rep_ops { + int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch_rep *rep); + void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); +}; + +struct mlx5_eswitch_rep_data { + void *priv; + atomic_t state; }; struct mlx5_eswitch_rep { - struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; + struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES]; u16 vport; u8 hw_id[ETH_ALEN]; u16 vlan; @@ -47,7 +49,7 @@ struct mlx5_eswitch_rep { }; void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, -- cgit v1.2.3 From 320587e6eac960591077b90271f40bfad24d6155 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 28 May 2019 10:57:34 +0100 Subject: net: sfp: add mandatory attach/detach methods for sfp buses Add attach and detach methods for SFP buses, which will allow us to get rid of the netdev storage in sfp-bus. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 16 ++++++++++++++++ drivers/net/phy/sfp-bus.c | 4 ++-- include/linux/sfp.h | 6 ++++++ 3 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index eb07c3d8f09e..503f4b221696 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1650,6 +1650,20 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) } EXPORT_SYMBOL_GPL(phylink_mii_ioctl); +static void phylink_sfp_attach(void *upstream, struct sfp_bus *bus) +{ + struct phylink *pl = upstream; + + pl->netdev->sfp_bus = bus; +} + +static void phylink_sfp_detach(void *upstream, struct sfp_bus *bus) +{ + struct phylink *pl = upstream; + + pl->netdev->sfp_bus = NULL; +} + static int phylink_sfp_module_insert(void *upstream, const struct sfp_eeprom_id *id) { @@ -1768,6 +1782,8 @@ static void phylink_sfp_disconnect_phy(void *upstream) } static const struct sfp_upstream_ops sfp_phylink_ops = { + .attach = phylink_sfp_attach, + .detach = phylink_sfp_detach, .module_insert = phylink_sfp_module_insert, .link_up = phylink_sfp_link_up, .link_down = phylink_sfp_link_down, diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index e9c187946cca..0608203cc752 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -351,7 +351,7 @@ static int sfp_register_bus(struct sfp_bus *bus) bus->socket_ops->attach(bus->sfp); if (bus->started) bus->socket_ops->start(bus->sfp); - bus->netdev->sfp_bus = bus; + bus->upstream_ops->attach(bus->upstream, bus); bus->registered = true; return 0; } @@ -360,8 +360,8 @@ static void sfp_unregister_bus(struct sfp_bus *bus) { const struct sfp_upstream_ops *ops = bus->upstream_ops; - bus->netdev->sfp_bus = NULL; if (bus->registered) { + bus->upstream_ops->detach(bus->upstream, bus); if (bus->started) bus->socket_ops->stop(bus->sfp); bus->socket_ops->detach(bus->sfp); diff --git a/include/linux/sfp.h b/include/linux/sfp.h index d9d9de3fcf8e..a3f0336dd703 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -469,6 +469,10 @@ struct sfp_bus; /** * struct sfp_upstream_ops - upstream operations structure + * @attach: called when the sfp socket driver is bound to the upstream + * (mandatory). + * @detach: called when the sfp socket driver is unbound from the upstream + * (mandatory). * @module_insert: called after a module has been detected to determine * whether the module is supported for the upstream device. * @module_remove: called after the module has been removed. @@ -481,6 +485,8 @@ struct sfp_bus; * been removed. */ struct sfp_upstream_ops { + void (*attach)(void *priv, struct sfp_bus *bus); + void (*detach)(void *priv, struct sfp_bus *bus); int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); void (*module_remove)(void *priv); void (*link_down)(void *priv); -- cgit v1.2.3 From 54f70b3ba364f19291dc8b9cb096b02a00fb4461 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 28 May 2019 10:57:39 +0100 Subject: net: sfp: remove sfp-bus use of netdevs The sfp-bus code now no longer has any use for the network device structure, so remove its use. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 3 +-- drivers/net/phy/sfp-bus.c | 10 +++------- include/linux/sfp.h | 6 ++---- 3 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 503f4b221696..f5b97dab3017 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -565,8 +565,7 @@ static int phylink_register_sfp(struct phylink *pl, return ret; } - pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl->netdev, pl, - &sfp_phylink_ops); + pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl, &sfp_phylink_ops); if (!pl->sfp_bus) return -ENOMEM; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 0608203cc752..b23fc41896ef 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -24,7 +24,6 @@ struct sfp_bus { const struct sfp_upstream_ops *upstream_ops; void *upstream; - struct net_device *netdev; struct phy_device *phydev; bool registered; @@ -443,13 +442,11 @@ static void sfp_upstream_clear(struct sfp_bus *bus) { bus->upstream_ops = NULL; bus->upstream = NULL; - bus->netdev = NULL; } /** * sfp_register_upstream() - Register the neighbouring device * @fwnode: firmware node for the SFP bus - * @ndev: network device associated with the interface * @upstream: the upstream private data * @ops: the upstream's &struct sfp_upstream_ops * @@ -460,7 +457,7 @@ static void sfp_upstream_clear(struct sfp_bus *bus) * On error, returns %NULL. */ struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, - struct net_device *ndev, void *upstream, + void *upstream, const struct sfp_upstream_ops *ops) { struct sfp_bus *bus = sfp_bus_get(fwnode); @@ -470,7 +467,6 @@ struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, rtnl_lock(); bus->upstream_ops = ops; bus->upstream = upstream; - bus->netdev = ndev; if (bus->sfp) { ret = sfp_register_bus(bus); @@ -592,7 +588,7 @@ struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp, bus->sfp = sfp; bus->socket_ops = ops; - if (bus->netdev) { + if (bus->upstream_ops) { ret = sfp_register_bus(bus); if (ret) sfp_socket_clear(bus); @@ -612,7 +608,7 @@ EXPORT_SYMBOL_GPL(sfp_register_socket); void sfp_unregister_socket(struct sfp_bus *bus) { rtnl_lock(); - if (bus->netdev) + if (bus->upstream_ops) sfp_unregister_bus(bus); sfp_socket_clear(bus); rtnl_unlock(); diff --git a/include/linux/sfp.h b/include/linux/sfp.h index a3f0336dd703..1c35428e98bc 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -464,7 +464,6 @@ enum { struct fwnode_handle; struct ethtool_eeprom; struct ethtool_modinfo; -struct net_device; struct sfp_bus; /** @@ -510,7 +509,7 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, - struct net_device *ndev, void *upstream, + void *upstream, const struct sfp_upstream_ops *ops); void sfp_unregister_upstream(struct sfp_bus *bus); #else @@ -555,8 +554,7 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus) } static inline struct sfp_bus *sfp_register_upstream( - struct fwnode_handle *fwnode, - struct net_device *ndev, void *upstream, + struct fwnode_handle *fwnode, void *upstream, const struct sfp_upstream_ops *ops) { return (struct sfp_bus *)-1; -- cgit v1.2.3 From 0ccc171ea6a2fa34a6b898329c0a447c84e27057 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 30 Jan 2019 17:21:55 +0200 Subject: net/mlx5: Geneve, Manage Geneve TLV options Use Geneve TLV Options object to manage the flex parser matching on the 32-bit options data. When the first flow with a certain class/type values is requested to be offloaded, create a FW object with FW command (Geneve TLV Options general object) and start counting the number of flows using this object. During this time, any request with a different class/type values will fail to be offloaded. Once the refcount reaches 0, destroy the TLV options general object, and can now offload a flow with any class/type parameters. Geneve TLV Options object is added to core device. It is currently used to manage Geneve TLV options general object allocation in FW and its reference counting only. In the future it will also be used for managing geneve ports by registering callbacks for ndo_udp_tunnel_add/del. Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../net/ethernet/mellanox/mlx5/core/lib/geneve.c | 157 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/lib/geneve.h | 33 +++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 + include/linux/mlx5/driver.h | 2 + 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..e31027277a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -31,7 +31,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ + lib/geneve.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000000..23361a9ae4fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000000..adee0cbba19c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include +#include + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23d53163ce15..b27f9537256c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -63,6 +63,7 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/geneve.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -821,6 +822,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_clock(dev); dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); err = mlx5_init_rl_table(dev); if (err) { @@ -865,6 +867,7 @@ err_mpfs_cleanup: err_rl_cleanup: mlx5_cleanup_rl_table(dev); err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); @@ -887,6 +890,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b5431f7d97cb..3a810bf043fe 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -647,6 +647,7 @@ struct mlx5_clock { struct mlx5_fw_tracer; struct mlx5_vxlan; +struct mlx5_geneve; struct mlx5_core_dev { struct device *device; @@ -681,6 +682,7 @@ struct mlx5_core_dev { u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_vxlan *vxlan; + struct mlx5_geneve *geneve; struct { struct mlx5_rsvd_gids reserved_gids; u32 roce_en; -- cgit v1.2.3 From 87e5e6dab6c2a21fab2620f37786276d202e2ce0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 May 2019 16:02:22 -0600 Subject: uio: make import_iovec()/compat_import_iovec() return bytes on success Currently these functions return < 0 on error, and 0 for success. Change that so that we return < 0 on error, but number of bytes for success. Some callers already treat the return value that way, others need a slight tweak. Signed-off-by: Jens Axboe --- fs/aio.c | 9 +++++---- fs/io_uring.c | 16 ++++++++-------- fs/splice.c | 8 ++++---- include/linux/uio.h | 4 ++-- lib/iov_iter.c | 15 ++++++++------- net/compat.c | 3 ++- net/socket.c | 3 ++- 7 files changed, 31 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 3490d1fa0e16..41824c710b36 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) return 0; } -static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, - bool vectored, bool compat, struct iov_iter *iter) +static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, + struct iovec **iovec, bool vectored, bool compat, + struct iov_iter *iter) { void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; size_t len = iocb->aio_nbytes; @@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) @@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fbb486a320e..23e08c10f486 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1003,9 +1003,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, return 0; } -static int io_import_iovec(struct io_ring_ctx *ctx, int rw, - const struct sqe_submit *s, struct iovec **iovec, - struct iov_iter *iter) +static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, + const struct sqe_submit *s, struct iovec **iovec, + struct iov_iter *iter) { const struct io_uring_sqe *sqe = s->sqe; void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -1023,7 +1023,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - int ret = io_import_fixed(ctx, rw, sqe, iter); + ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -1089,7 +1089,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1102,7 +1102,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; iov_count = iov_iter_count(&iter); @@ -1136,7 +1136,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1149,7 +1149,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; iov_count = iov_iter_count(&iter); diff --git a/fs/splice.c b/fs/splice.c index 14cb602d9a2f..98412721f056 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } @@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io error = compat_import_iovec(type, iov32, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } diff --git a/include/linux/uio.h b/include/linux/uio.h index 2d0131ad4604..a61ceb6575ab 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -279,13 +279,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); -int import_iovec(int type, const struct iovec __user * uvector, +ssize_t import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i); #ifdef CONFIG_COMPAT struct compat_iovec; -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, +ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i); #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f99c41d4eb54..f1e0569b4539 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1634,9 +1634,9 @@ EXPORT_SYMBOL(dup_iter); * on-stack array was used or not (and regardless of whether this function * returns an error or not). * - * Return: 0 on success or negative error code on error. + * Return: Negative error code on error, bytes imported on success */ -int import_iovec(int type, const struct iovec __user * uvector, +ssize_t import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i) { @@ -1652,16 +1652,17 @@ int import_iovec(int type, const struct iovec __user * uvector, } iov_iter_init(i, type, p, nr_segs, n); *iov = p == *iov ? NULL : p; - return 0; + return n; } EXPORT_SYMBOL(import_iovec); #ifdef CONFIG_COMPAT #include -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, - unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i) +ssize_t compat_import_iovec(int type, + const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) { ssize_t n; struct iovec *p; @@ -1675,7 +1676,7 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, } iov_iter_init(i, type, p, nr_segs, n); *iov = p == *iov ? NULL : p; - return 0; + return n; } #endif diff --git a/net/compat.c b/net/compat.c index 3f9ce609397f..0f7ded26059e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -80,9 +80,10 @@ int get_compat_msghdr(struct msghdr *kmsg, kmsg->msg_iocb = NULL; - return compat_import_iovec(save_addr ? READ : WRITE, + err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); + return err < 0 ? err : 0; } /* Bleech... */ diff --git a/net/socket.c b/net/socket.c index 72372dc5dd70..bffec466b4f1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2208,9 +2208,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_iocb = NULL; - return import_iovec(save_addr ? READ : WRITE, + err = import_iovec(save_addr ? READ : WRITE, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); + return err < 0 ? err : 0; } static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, -- cgit v1.2.3 From 1f52f6c0b0e846908e9c1082dab1b3f7088b82ac Mon Sep 17 00:00:00 2001 From: brakmo Date: Tue, 28 May 2019 16:59:35 -0700 Subject: bpf: Create BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY Create new macro BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY() to be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs so BPF programs can request cwr for TCP packets. Current cgroup skb programs can only return 0 or 1 (0 to drop the packet. This macro changes the behavior so the low order bit indicates whether the packet should be dropped (0) or not (1) and the next bit is used for congestion notification (cn). Hence, new allowed return values of CGROUP EGRESS BPF programs are: 0: drop packet 1: keep packet 2: drop packet and call cwr 3: keep packet and call cwr This macro then converts it to one of NET_XMIT values or -EPERM that has the effect of dropping the packet with no cn. 0: NET_XMIT_SUCCESS skb should be transmitted (no cn) 1: NET_XMIT_DROP skb should be dropped and cwr called 2: NET_XMIT_CN skb should be transmitted and cwr called 3: -EPERM skb should be dropped (no cn) Note that when more than one BPF program is called, the packet is dropped if at least one of programs requests it be dropped, and there is cn if at least one program returns cn. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ff3e00ff84d2..2cc58fc0f413 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -552,6 +552,56 @@ _out: \ _ret; \ }) +/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs + * so BPF programs can request cwr for TCP packets. + * + * Current cgroup skb programs can only return 0 or 1 (0 to drop the + * packet. This macro changes the behavior so the low order bit + * indicates whether the packet should be dropped (0) or not (1) + * and the next bit is a congestion notification bit. This could be + * used by TCP to call tcp_enter_cwr() + * + * Hence, new allowed return values of CGROUP EGRESS BPF programs are: + * 0: drop packet + * 1: keep packet + * 2: drop packet and cn + * 3: keep packet and cn + * + * This macro then converts it to one of the NET_XMIT or an error + * code that is then interpreted as drop packet (and no cn): + * 0: NET_XMIT_SUCCESS skb should be transmitted + * 1: NET_XMIT_DROP skb should be dropped and cn + * 2: NET_XMIT_CN skb should be transmitted and cn + * 3: -EPERM skb should be dropped + */ +#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_prog_array_item *_item; \ + struct bpf_prog *_prog; \ + struct bpf_prog_array *_array; \ + u32 ret; \ + u32 _ret = 1; \ + u32 _cn = 0; \ + preempt_disable(); \ + rcu_read_lock(); \ + _array = rcu_dereference(array); \ + _item = &_array->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ + ret = func(_prog, ctx); \ + _ret &= (ret & 1); \ + _cn |= (ret & 2); \ + _item++; \ + } \ + rcu_read_unlock(); \ + preempt_enable(); \ + if (_ret) \ + _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ + else \ + _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ + _ret; \ + }) + #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ __BPF_PROG_RUN_ARRAY(array, ctx, func, false) -- cgit v1.2.3 From 5cf1e91456301f8c4f6bbc63ff76cff12f92f31b Mon Sep 17 00:00:00 2001 From: brakmo Date: Tue, 28 May 2019 16:59:36 -0700 Subject: bpf: cgroup inet skb programs can return 0 to 3 Allows cgroup inet skb programs to return values in the range [0, 3]. The second bit is used to deterine if congestion occurred and higher level protocol should decrease rate. E.g. TCP would call tcp_enter_cwr() The bpf_prog must set expected_attach_type to BPF_CGROUP_INET_EGRESS at load time if it uses the new return values (i.e. 2 or 3). The expected_attach_type is currently not enforced for BPF_PROG_TYPE_CGROUP_SKB. e.g Meaning the current bpf_prog with expected_attach_type setting to BPF_CGROUP_INET_EGRESS can attach to BPF_CGROUP_INET_INGRESS. Blindly enforcing expected_attach_type will break backward compatibility. This patch adds a enforce_expected_attach_type bit to only enforce the expected_attach_type when it uses the new return value. Signed-off-by: Lawrence Brakmo Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 ++- kernel/bpf/syscall.c | 12 ++++++++++++ kernel/bpf/verifier.c | 16 +++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ba8b65270e0d..43b45d6db36d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -526,7 +526,8 @@ struct bpf_prog { blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ - has_callchain_buf:1; /* callchain buffer allocated? */ + has_callchain_buf:1, /* callchain buffer allocated? */ + enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3d546b6f4646..1539774d78c7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1585,6 +1585,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_CGROUP_SKB: + switch (expected_attach_type) { + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + return 0; + default: + return -EINVAL; + } default: return 0; } @@ -1836,6 +1844,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; + case BPF_PROG_TYPE_CGROUP_SKB: + return prog->enforce_expected_attach_type && + prog->expected_attach_type != attach_type ? + -EINVAL : 0; default: return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2778417e6e0c..5c2cb5bd84ce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5508,11 +5508,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) static int check_return_code(struct bpf_verifier_env *env) { + struct tnum enforce_attach_type_range = tnum_unknown; struct bpf_reg_state *reg; struct tnum range = tnum_range(0, 1); switch (env->prog->type) { case BPF_PROG_TYPE_CGROUP_SKB: + if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { + range = tnum_range(0, 3); + enforce_attach_type_range = tnum_range(2, 3); + } case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_SOCK_OPS: @@ -5531,18 +5536,23 @@ static int check_return_code(struct bpf_verifier_env *env) } if (!tnum_in(range, reg->var_off)) { + char tn_buf[48]; + verbose(env, "At program exit the register R0 "); if (!tnum_is_unknown(reg->var_off)) { - char tn_buf[48]; - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, "has value %s", tn_buf); } else { verbose(env, "has unknown scalar value"); } - verbose(env, " should have been 0 or 1\n"); + tnum_strn(tn_buf, sizeof(tn_buf), range); + verbose(env, " should have been %s\n", tn_buf); return -EINVAL; } + + if (!tnum_is_unknown(enforce_attach_type_range) && + tnum_in(enforce_attach_type_range, reg->var_off)) + env->prog->enforce_expected_attach_type = 1; return 0; } -- cgit v1.2.3 From 3539b96e041c06e4317082816d90ec09160aeb11 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 29 May 2019 18:03:57 -0700 Subject: bpf: group memory related fields in struct bpf_map_memory Group "user" and "pages" fields of bpf_map into the bpf_map_memory structure. Later it can be extended with "memcg" and other related information. The main reason for a such change (beside cosmetics) is to pass bpf_map_memory structure to charging functions before the actual allocation of bpf_map. Signed-off-by: Roman Gushchin Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 10 +++++++--- kernel/bpf/arraymap.c | 2 +- kernel/bpf/cpumap.c | 4 ++-- kernel/bpf/devmap.c | 4 ++-- kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/local_storage.c | 2 +- kernel/bpf/lpm_trie.c | 4 ++-- kernel/bpf/queue_stack_maps.c | 2 +- kernel/bpf/reuseport_array.c | 2 +- kernel/bpf/stackmap.c | 4 ++-- kernel/bpf/syscall.c | 19 ++++++++++--------- kernel/bpf/xskmap.c | 4 ++-- net/core/bpf_sk_storage.c | 2 +- net/core/sock_map.c | 4 ++-- 14 files changed, 36 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2cc58fc0f413..2e7c1c40d949 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -66,6 +66,11 @@ struct bpf_map_ops { u64 imm, u32 *off); }; +struct bpf_map_memory { + u32 pages; + struct user_struct *user; +}; + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -86,7 +91,7 @@ struct bpf_map { u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; - u32 pages; + struct bpf_map_memory memory; bool unpriv_array; bool frozen; /* write-once */ /* 48 bytes hole */ @@ -94,8 +99,7 @@ struct bpf_map { /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ - struct user_struct *user ____cacheline_aligned; - atomic_t refcnt; + atomic_t refcnt ____cacheline_aligned; atomic_t usercnt; struct work_struct work; char name[BPF_OBJ_NAME_LEN]; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 584636c9e2eb..8fda24e78193 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -138,7 +138,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - array->map.pages = cost; + array->map.memory.pages = cost; array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index cf727d77c6c6..035268add724 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -108,10 +108,10 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) goto free_cmap; - cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + cmap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; /* Notice returns -EPERM on if map size is larger than memlock limit */ - ret = bpf_map_precharge_memlock(cmap->map.pages); + ret = bpf_map_precharge_memlock(cmap->map.memory.pages); if (ret) { err = ret; goto free_cmap; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1e525d70f833..f6c57efb1d0d 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -111,10 +111,10 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) goto free_dtab; - dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + dtab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(dtab->map.pages); + err = bpf_map_precharge_memlock(dtab->map.memory.pages); if (err) goto free_dtab; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0f2708fde5f7..15bf228d2e98 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -364,10 +364,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ goto free_htab; - htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + htab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(htab->map.pages); + err = bpf_map_precharge_memlock(htab->map.memory.pages); if (err) goto free_htab; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index e48302ecb389..574325276650 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -303,7 +303,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (!map) return ERR_PTR(-ENOMEM); - map->map.pages = pages; + map->map.memory.pages = pages; /* copy mandatory map attributes */ bpf_map_init_from_attr(&map->map, attr); diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e61630c2e50b..8e423a582760 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -578,9 +578,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) goto out_err; } - trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + trie->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_precharge_memlock(trie->map.pages); + ret = bpf_map_precharge_memlock(trie->map.memory.pages); if (ret) goto out_err; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 0b140d236889..8a510e71d486 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -89,7 +89,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&qs->map, attr); - qs->map.pages = cost; + qs->map.memory.pages = cost; qs->size = size; raw_spin_lock_init(&qs->lock); diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 18e225de80ff..819515242739 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -176,7 +176,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - array->map.pages = cost; + array->map.memory.pages = cost; return &array->map; } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 950ab2f28922..08d4efff73ac 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -131,9 +131,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&smap->map, attr); smap->map.value_size = value_size; smap->n_buckets = n_buckets; - smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + smap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - err = bpf_map_precharge_memlock(smap->map.pages); + err = bpf_map_precharge_memlock(smap->map.memory.pages); if (err) goto free_smap; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1539774d78c7..8289a2ce14fc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -222,19 +222,20 @@ static int bpf_map_init_memlock(struct bpf_map *map) struct user_struct *user = get_current_user(); int ret; - ret = bpf_charge_memlock(user, map->pages); + ret = bpf_charge_memlock(user, map->memory.pages); if (ret) { free_uid(user); return ret; } - map->user = user; + map->memory.user = user; return ret; } static void bpf_map_release_memlock(struct bpf_map *map) { - struct user_struct *user = map->user; - bpf_uncharge_memlock(user, map->pages); + struct user_struct *user = map->memory.user; + + bpf_uncharge_memlock(user, map->memory.pages); free_uid(user); } @@ -242,17 +243,17 @@ int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) { int ret; - ret = bpf_charge_memlock(map->user, pages); + ret = bpf_charge_memlock(map->memory.user, pages); if (ret) return ret; - map->pages += pages; + map->memory.pages += pages; return ret; } void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) { - bpf_uncharge_memlock(map->user, pages); - map->pages -= pages; + bpf_uncharge_memlock(map->memory.user, pages); + map->memory.pages -= pages; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -395,7 +396,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, - map->pages * 1ULL << PAGE_SHIFT, + map->memory.pages * 1ULL << PAGE_SHIFT, map->id, READ_ONCE(map->frozen)); diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 686d244e798d..f816ee1a0fa0 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -40,10 +40,10 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) goto free_m; - m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + m->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; /* Notice returns -EPERM on if map size is larger than memlock limit */ - err = bpf_map_precharge_memlock(m->map.pages); + err = bpf_map_precharge_memlock(m->map.memory.pages); if (err) goto free_m; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 9a8aaf8e235d..92581c3ff220 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -659,7 +659,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % BPF_SK_STORAGE_CACHE_SIZE; - smap->map.pages = pages; + smap->map.memory.pages = pages; return &smap->map; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index be6092ac69f8..4eb5b6a1b29f 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -49,8 +49,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) goto free_stab; } - stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - err = bpf_map_precharge_memlock(stab->map.pages); + stab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + err = bpf_map_precharge_memlock(stab->map.memory.pages); if (err) goto free_stab; -- cgit v1.2.3 From b936ca643ade11f265fa10e5fb71c20d9c5243f1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 29 May 2019 18:03:58 -0700 Subject: bpf: rework memlock-based memory accounting for maps In order to unify the existing memlock charging code with the memcg-based memory accounting, which will be added later, let's rework the current scheme. Currently the following design is used: 1) .alloc() callback optionally checks if the allocation will likely succeed using bpf_map_precharge_memlock() 2) .alloc() performs actual allocations 3) .alloc() callback calculates map cost and sets map.memory.pages 4) map_create() calls bpf_map_init_memlock() which sets map.memory.user and performs actual charging; in case of failure the map is destroyed 1) bpf_map_free_deferred() calls bpf_map_release_memlock(), which performs uncharge and releases the user 2) .map_free() callback releases the memory The scheme can be simplified and made more robust: 1) .alloc() calculates map cost and calls bpf_map_charge_init() 2) bpf_map_charge_init() sets map.memory.user and performs actual charge 3) .alloc() performs actual allocations 1) .map_free() callback releases the memory 2) bpf_map_charge_finish() performs uncharge and releases the user The new scheme also allows to reuse bpf_map_charge_init()/finish() functions for memcg-based accounting. Because charges are performed before actual allocations and uncharges after freeing the memory, no bogus memory pressure can be created. In cases when the map structure is not available (e.g. it's not created yet, or is already destroyed), on-stack bpf_map_memory structure is used. The charge can be transferred with the bpf_map_charge_move() function. Signed-off-by: Roman Gushchin Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++- kernel/bpf/arraymap.c | 10 +++++-- kernel/bpf/cpumap.c | 8 +++-- kernel/bpf/devmap.c | 13 ++++---- kernel/bpf/hashtab.c | 11 +++---- kernel/bpf/local_storage.c | 9 ++++-- kernel/bpf/lpm_trie.c | 5 ++-- kernel/bpf/queue_stack_maps.c | 9 ++++-- kernel/bpf/reuseport_array.c | 9 ++++-- kernel/bpf/stackmap.c | 30 +++++++++++-------- kernel/bpf/syscall.c | 69 +++++++++++++++++++++---------------------- kernel/bpf/xskmap.c | 9 +++--- net/core/bpf_sk_storage.c | 8 +++-- net/core/sock_map.c | 5 ++-- 14 files changed, 112 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2e7c1c40d949..3c8f24f402bf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -650,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); -int bpf_map_precharge_memlock(u32 pages); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); +int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages); +void bpf_map_charge_finish(struct bpf_map_memory *mem); +void bpf_map_charge_move(struct bpf_map_memory *dst, + struct bpf_map_memory *src); void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8fda24e78193..3552da4407d9 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); u64 cost, array_size, mask64; + struct bpf_map_memory mem; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); @@ -125,23 +126,26 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_precharge_memlock(cost); + ret = bpf_map_charge_init(&mem, cost); if (ret < 0) return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); - if (!array) + if (!array) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); + } array->index_mask = index_mask; array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - array->map.memory.pages = cost; + bpf_map_charge_move(&array->map.memory, &mem); array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { + bpf_map_charge_finish(&array->map.memory); bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 035268add724..c633c8d68023 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -108,10 +108,10 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) goto free_cmap; - cmap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; /* Notice returns -EPERM on if map size is larger than memlock limit */ - ret = bpf_map_precharge_memlock(cmap->map.memory.pages); + ret = bpf_map_charge_init(&cmap->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (ret) { err = ret; goto free_cmap; @@ -121,7 +121,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), __alignof__(unsigned long)); if (!cmap->flush_needed) - goto free_cmap; + goto free_charge; /* Alloc array for possible remote "destination" CPUs */ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * @@ -133,6 +133,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) return &cmap->map; free_percpu: free_percpu(cmap->flush_needed); +free_charge: + bpf_map_charge_finish(&cmap->map.memory); free_cmap: kfree(cmap); return ERR_PTR(err); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f6c57efb1d0d..371bd880ed58 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -111,10 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) goto free_dtab; - dtab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - - /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(dtab->map.memory.pages); + /* if map size is larger than memlock limit, reject it */ + err = bpf_map_charge_init(&dtab->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (err) goto free_dtab; @@ -125,19 +124,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) __alignof__(unsigned long), GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) - goto free_dtab; + goto free_charge; dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_dtab; + goto free_charge; spin_lock(&dev_map_lock); list_add_tail_rcu(&dtab->list, &dev_map_list); spin_unlock(&dev_map_lock); return &dtab->map; +free_charge: + bpf_map_charge_finish(&dtab->map.memory); free_dtab: free_percpu(dtab->flush_needed); kfree(dtab); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 15bf228d2e98..b0bdc7b040ad 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -364,10 +364,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ goto free_htab; - htab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - - /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(htab->map.memory.pages); + /* if map size is larger than memlock limit, reject it */ + err = bpf_map_charge_init(&htab->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (err) goto free_htab; @@ -376,7 +375,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) sizeof(struct bucket), htab->map.numa_node); if (!htab->buckets) - goto free_htab; + goto free_charge; if (htab->map.map_flags & BPF_F_ZERO_SEED) htab->hashrnd = 0; @@ -409,6 +408,8 @@ free_prealloc: prealloc_destroy(htab); free_buckets: bpf_map_area_free(htab->buckets); +free_charge: + bpf_map_charge_finish(&htab->map.memory); free_htab: kfree(htab); return ERR_PTR(err); diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 574325276650..e49bfd4f4f6d 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -272,6 +272,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct bpf_cgroup_storage_map *map; + struct bpf_map_memory mem; u32 pages; int ret; @@ -294,16 +295,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) pages = round_up(sizeof(struct bpf_cgroup_storage_map), PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_precharge_memlock(pages); + ret = bpf_map_charge_init(&mem, pages); if (ret < 0) return ERR_PTR(ret); map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), __GFP_ZERO | GFP_USER, numa_node); - if (!map) + if (!map) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); + } - map->map.memory.pages = pages; + bpf_map_charge_move(&map->map.memory, &mem); /* copy mandatory map attributes */ bpf_map_init_from_attr(&map->map, attr); diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 8e423a582760..6345a8d2dcd0 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -578,9 +578,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) goto out_err; } - trie->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - - ret = bpf_map_precharge_memlock(trie->map.memory.pages); + ret = bpf_map_charge_init(&trie->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (ret) goto out_err; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 8a510e71d486..224cb0fd8f03 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -67,6 +67,7 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr) static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) { int ret, numa_node = bpf_map_attr_numa_node(attr); + struct bpf_map_memory mem = {0}; struct bpf_queue_stack *qs; u64 size, queue_size, cost; @@ -77,19 +78,21 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_precharge_memlock(cost); + ret = bpf_map_charge_init(&mem, cost); if (ret < 0) return ERR_PTR(ret); qs = bpf_map_area_alloc(queue_size, numa_node); - if (!qs) + if (!qs) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); + } memset(qs, 0, sizeof(*qs)); bpf_map_init_from_attr(&qs->map, attr); - qs->map.memory.pages = cost; + bpf_map_charge_move(&qs->map.memory, &mem); qs->size = size; raw_spin_lock_init(&qs->lock); diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 819515242739..5c6e25b1b9b1 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -151,6 +151,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) { int err, numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; + struct bpf_map_memory mem; u64 cost, array_size; if (!capable(CAP_SYS_ADMIN)) @@ -165,18 +166,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - err = bpf_map_precharge_memlock(cost); + err = bpf_map_charge_init(&mem, cost); if (err) return ERR_PTR(err); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); - if (!array) + if (!array) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); + } /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - array->map.memory.pages = cost; + bpf_map_charge_move(&array->map.memory, &mem); return &array->map; } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 08d4efff73ac..8da24ca65d97 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_stack_map *smap; + struct bpf_map_memory mem; u64 cost, n_buckets; int err; @@ -116,40 +117,43 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) n_buckets = roundup_pow_of_two(attr->max_entries); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-E2BIG); + cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-E2BIG); + err = bpf_map_charge_init(&mem, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + if (err) + return ERR_PTR(err); + smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); - if (!smap) + if (!smap) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); - - err = -E2BIG; - cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - if (cost >= U32_MAX - PAGE_SIZE) - goto free_smap; + } bpf_map_init_from_attr(&smap->map, attr); smap->map.value_size = value_size; smap->n_buckets = n_buckets; - smap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - - err = bpf_map_precharge_memlock(smap->map.memory.pages); - if (err) - goto free_smap; err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) - goto free_smap; + goto free_charge; err = prealloc_elems_and_freelist(smap); if (err) goto put_buffers; + bpf_map_charge_move(&smap->map.memory, &mem); + return &smap->map; put_buffers: put_callchain_buffers(); -free_smap: +free_charge: + bpf_map_charge_finish(&mem); bpf_map_area_free(smap); return ERR_PTR(err); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8289a2ce14fc..4a5ebad99154 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->numa_node = bpf_map_attr_numa_node(attr); } -int bpf_map_precharge_memlock(u32 pages) -{ - struct user_struct *user = get_current_user(); - unsigned long memlock_limit, cur; - - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - cur = atomic_long_read(&user->locked_vm); - free_uid(user); - if (cur + pages > memlock_limit) - return -EPERM; - return 0; -} - static int bpf_charge_memlock(struct user_struct *user, u32 pages) { unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -214,29 +201,40 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages) static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) { - atomic_long_sub(pages, &user->locked_vm); + if (user) + atomic_long_sub(pages, &user->locked_vm); } -static int bpf_map_init_memlock(struct bpf_map *map) +int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages) { struct user_struct *user = get_current_user(); int ret; - ret = bpf_charge_memlock(user, map->memory.pages); + ret = bpf_charge_memlock(user, pages); if (ret) { free_uid(user); return ret; } - map->memory.user = user; - return ret; + + mem->pages = pages; + mem->user = user; + + return 0; } -static void bpf_map_release_memlock(struct bpf_map *map) +void bpf_map_charge_finish(struct bpf_map_memory *mem) { - struct user_struct *user = map->memory.user; + bpf_uncharge_memlock(mem->user, mem->pages); + free_uid(mem->user); +} - bpf_uncharge_memlock(user, map->memory.pages); - free_uid(user); +void bpf_map_charge_move(struct bpf_map_memory *dst, + struct bpf_map_memory *src) +{ + *dst = *src; + + /* Make sure src will not be used for the redundant uncharging. */ + memset(src, 0, sizeof(struct bpf_map_memory)); } int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) @@ -304,11 +302,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); + struct bpf_map_memory mem; - bpf_map_release_memlock(map); + bpf_map_charge_move(&mem, &map->memory); security_bpf_map_free(map); /* implementation dependent freeing */ map->ops->map_free(map); + bpf_map_charge_finish(&mem); } static void bpf_map_put_uref(struct bpf_map *map) @@ -550,6 +550,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_map_memory mem; struct bpf_map *map; int f_flags; int err; @@ -574,7 +575,7 @@ static int map_create(union bpf_attr *attr) err = bpf_obj_name_cpy(map->name, attr->map_name); if (err) - goto free_map_nouncharge; + goto free_map; atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); @@ -584,20 +585,20 @@ static int map_create(union bpf_attr *attr) if (!attr->btf_value_type_id) { err = -EINVAL; - goto free_map_nouncharge; + goto free_map; } btf = btf_get_by_fd(attr->btf_fd); if (IS_ERR(btf)) { err = PTR_ERR(btf); - goto free_map_nouncharge; + goto free_map; } err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id); if (err) { btf_put(btf); - goto free_map_nouncharge; + goto free_map; } map->btf = btf; @@ -609,15 +610,11 @@ static int map_create(union bpf_attr *attr) err = security_bpf_map_alloc(map); if (err) - goto free_map_nouncharge; - - err = bpf_map_init_memlock(map); - if (err) - goto free_map_sec; + goto free_map; err = bpf_map_alloc_id(map); if (err) - goto free_map; + goto free_map_sec; err = bpf_map_new_fd(map, f_flags); if (err < 0) { @@ -633,13 +630,13 @@ static int map_create(union bpf_attr *attr) return err; -free_map: - bpf_map_release_memlock(map); free_map_sec: security_bpf_map_free(map); -free_map_nouncharge: +free_map: btf_put(map->btf); + bpf_map_charge_move(&mem, &map->memory); map->ops->map_free(map); + bpf_map_charge_finish(&mem); return err; } diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index f816ee1a0fa0..a329dab7c7a4 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -40,10 +40,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) goto free_m; - m->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - /* Notice returns -EPERM on if map size is larger than memlock limit */ - err = bpf_map_precharge_memlock(m->map.memory.pages); + err = bpf_map_charge_init(&m->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (err) goto free_m; @@ -51,7 +50,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) m->flush_list = alloc_percpu(struct list_head); if (!m->flush_list) - goto free_m; + goto free_charge; for_each_possible_cpu(cpu) INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); @@ -65,6 +64,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) free_percpu: free_percpu(m->flush_list); +free_charge: + bpf_map_charge_finish(&m->map.memory); free_m: kfree(m); return ERR_PTR(err); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 92581c3ff220..621a0b07ff11 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -640,13 +640,16 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_precharge_memlock(pages); - if (ret < 0) + ret = bpf_map_charge_init(&smap->map.memory, pages); + if (ret < 0) { + kfree(smap); return ERR_PTR(ret); + } smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, GFP_USER | __GFP_NOWARN); if (!smap->buckets) { + bpf_map_charge_finish(&smap->map.memory); kfree(smap); return ERR_PTR(-ENOMEM); } @@ -659,7 +662,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % BPF_SK_STORAGE_CACHE_SIZE; - smap->map.memory.pages = pages; return &smap->map; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4eb5b6a1b29f..1028c922a149 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -49,8 +49,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) goto free_stab; } - stab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - err = bpf_map_precharge_memlock(stab->map.memory.pages); + err = bpf_map_charge_init(&stab->map.memory, + round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); if (err) goto free_stab; @@ -60,6 +60,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (stab->sks) return &stab->map; err = -ENOMEM; + bpf_map_charge_finish(&stab->map.memory); free_stab: kfree(stab); return ERR_PTR(err); -- cgit v1.2.3 From c85d69135a9175c50a823d04d62d932312d037b3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 29 May 2019 18:03:59 -0700 Subject: bpf: move memory size checks to bpf_map_charge_init() Most bpf map types doing similar checks and bytes to pages conversion during memory allocation and charging. Let's unify these checks by moving them into bpf_map_charge_init(). Signed-off-by: Roman Gushchin Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/arraymap.c | 8 +------- kernel/bpf/cpumap.c | 5 +---- kernel/bpf/devmap.c | 5 +---- kernel/bpf/hashtab.c | 7 +------ kernel/bpf/local_storage.c | 5 +---- kernel/bpf/lpm_trie.c | 7 +------ kernel/bpf/queue_stack_maps.c | 4 ---- kernel/bpf/reuseport_array.c | 10 ++-------- kernel/bpf/stackmap.c | 8 +------- kernel/bpf/syscall.c | 9 +++++++-- kernel/bpf/xskmap.c | 5 +---- net/core/bpf_sk_storage.c | 4 +--- net/core/sock_map.c | 8 +------- 14 files changed, 20 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3c8f24f402bf..e5a309e6a400 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -652,7 +652,7 @@ void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages); +int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size); void bpf_map_charge_finish(struct bpf_map_memory *mem); void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3552da4407d9..0349cbf23cdb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -117,14 +117,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* make sure there is no u32 overflow later in round_up() */ cost = array_size; - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-ENOMEM); - if (percpu) { + if (percpu) cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-ENOMEM); - } - cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; ret = bpf_map_charge_init(&mem, cost); if (ret < 0) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index c633c8d68023..b31a71909307 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); - if (cost >= U32_MAX - PAGE_SIZE) - goto free_cmap; /* Notice returns -EPERM on if map size is larger than memlock limit */ - ret = bpf_map_charge_init(&cmap->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + ret = bpf_map_charge_init(&cmap->map.memory, cost); if (ret) { err = ret; goto free_cmap; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 371bd880ed58..5ae7cce5ef16 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -108,12 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += dev_map_bitmap_size(attr) * num_possible_cpus(); - if (cost >= U32_MAX - PAGE_SIZE) - goto free_dtab; /* if map size is larger than memlock limit, reject it */ - err = bpf_map_charge_init(&dtab->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + err = bpf_map_charge_init(&dtab->map.memory, cost); if (err) goto free_dtab; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b0bdc7b040ad..d92e05d9979b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -360,13 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) else cost += (u64) htab->elem_size * num_possible_cpus(); - if (cost >= U32_MAX - PAGE_SIZE) - /* make sure page count doesn't overflow */ - goto free_htab; - /* if map size is larger than memlock limit, reject it */ - err = bpf_map_charge_init(&htab->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + err = bpf_map_charge_init(&htab->map.memory, cost); if (err) goto free_htab; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index e49bfd4f4f6d..addd6fdceec8 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -273,7 +273,6 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct bpf_cgroup_storage_map *map; struct bpf_map_memory mem; - u32 pages; int ret; if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) @@ -293,9 +292,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) /* max_entries is not used and enforced to be 0 */ return ERR_PTR(-EINVAL); - pages = round_up(sizeof(struct bpf_cgroup_storage_map), PAGE_SIZE) >> - PAGE_SHIFT; - ret = bpf_map_charge_init(&mem, pages); + ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map)); if (ret < 0) return ERR_PTR(ret); diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 6345a8d2dcd0..09334f13a8a0 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -573,13 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) cost_per_node = sizeof(struct lpm_trie_node) + attr->value_size + trie->data_size; cost += (u64) attr->max_entries * cost_per_node; - if (cost >= U32_MAX - PAGE_SIZE) { - ret = -E2BIG; - goto out_err; - } - ret = bpf_map_charge_init(&trie->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + ret = bpf_map_charge_init(&trie->map.memory, cost); if (ret) goto out_err; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 224cb0fd8f03..f697647ceb54 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -73,10 +73,6 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) size = (u64) attr->max_entries + 1; cost = queue_size = sizeof(*qs) + size * attr->value_size; - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-E2BIG); - - cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; ret = bpf_map_charge_init(&mem, cost); if (ret < 0) diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 5c6e25b1b9b1..50c083ba978c 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -152,7 +152,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) int err, numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; struct bpf_map_memory mem; - u64 cost, array_size; + u64 array_size; if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -160,13 +160,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) array_size = sizeof(*array); array_size += (u64)attr->max_entries * sizeof(struct sock *); - /* make sure there is no u32 overflow later in round_up() */ - cost = array_size; - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-ENOMEM); - cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - - err = bpf_map_charge_init(&mem, cost); + err = bpf_map_charge_init(&mem, array_size); if (err) return ERR_PTR(err); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 8da24ca65d97..3d86072d8e32 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -117,14 +117,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) n_buckets = roundup_pow_of_two(attr->max_entries); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-E2BIG); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - if (cost >= U32_MAX - PAGE_SIZE) - return ERR_PTR(-E2BIG); - - err = bpf_map_charge_init(&mem, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + err = bpf_map_charge_init(&mem, cost); if (err) return ERR_PTR(err); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4a5ebad99154..4c53cbd3329d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -205,11 +205,16 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) atomic_long_sub(pages, &user->locked_vm); } -int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages) +int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size) { - struct user_struct *user = get_current_user(); + u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; + struct user_struct *user; int ret; + if (size >= U32_MAX - PAGE_SIZE) + return -E2BIG; + + user = get_current_user(); ret = bpf_charge_memlock(user, pages); if (ret) { free_uid(user); diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index a329dab7c7a4..22066c28ba61 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -37,12 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); cost += sizeof(struct list_head) * num_possible_cpus(); - if (cost >= U32_MAX - PAGE_SIZE) - goto free_m; /* Notice returns -EPERM on if map size is larger than memlock limit */ - err = bpf_map_charge_init(&m->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + err = bpf_map_charge_init(&m->map.memory, cost); if (err) goto free_m; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 621a0b07ff11..f40e3d35fd9c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -626,7 +626,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) struct bpf_sk_storage_map *smap; unsigned int i; u32 nbuckets; - u32 pages; u64 cost; int ret; @@ -638,9 +637,8 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); nbuckets = 1U << smap->bucket_log; cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); - pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - ret = bpf_map_charge_init(&smap->map.memory, pages); + ret = bpf_map_charge_init(&smap->map.memory, cost); if (ret < 0) { kfree(smap); return ERR_PTR(ret); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1028c922a149..52d4faeee18b 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -44,13 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* Make sure page count doesn't overflow. */ cost = (u64) stab->map.max_entries * sizeof(struct sock *); - if (cost >= U32_MAX - PAGE_SIZE) { - err = -EINVAL; - goto free_stab; - } - - err = bpf_map_charge_init(&stab->map.memory, - round_up(cost, PAGE_SIZE) >> PAGE_SHIFT); + err = bpf_map_charge_init(&stab->map.memory, cost); if (err) goto free_stab; -- cgit v1.2.3 From 5213d7efc8ec26ed8938dce75427eff9275a62d9 Mon Sep 17 00:00:00 2001 From: Ruslan Babayev Date: Tue, 28 May 2019 16:02:32 -0700 Subject: i2c: acpi: export i2c_acpi_find_adapter_by_handle This allows drivers to lookup i2c adapters on ACPI based systems similar to of_get_i2c_adapter_by_node() with DT based systems. Signed-off-by: Ruslan Babayev Reviewed-by: Andrew Lunn Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 3 ++- include/linux/i2c.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 272800692088..964687534754 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -337,7 +337,7 @@ static int i2c_acpi_find_match_device(struct device *dev, void *data) return ACPI_COMPANION(dev) == data; } -static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) +struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) { struct device *dev; @@ -345,6 +345,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) i2c_acpi_find_match_adapter); return dev ? i2c_verify_adapter(dev) : NULL; } +EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle); static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1308126fc384..e982b8913b73 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -14,6 +14,7 @@ #ifndef _LINUX_I2C_H #define _LINUX_I2C_H +#include /* for acpi_handle */ #include #include /* for struct device */ #include /* for completion */ @@ -981,6 +982,7 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info); +struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); #else static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c) @@ -996,6 +998,10 @@ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, { return NULL; } +static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) +{ + return NULL; +} #endif /* CONFIG_ACPI */ #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From ef11db3310e272d3d8dbe8739e0770820dd20e52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 31 May 2019 18:27:04 +0200 Subject: net: inetdevice: provide replacement iterators for in_ifaddr walk The ifa_list is protected either by rcu or rtnl lock, but the current iterators do not account for this. This adds two iterators as replacement, a later patch in the series will update them with the needed rcu/rtnl_dereference calls. Its not done in this patch yet to avoid sparse warnings -- the fields lack the proper __rcu annotation. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 10 +++++++++- net/ipv4/devinet.c | 31 ++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 367dc2a0f84a..d5d05503a04b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -186,7 +186,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); -static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) +static inline bool inet_ifa_match(__be32 addr, const struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } @@ -215,6 +215,14 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr) #define endfor_ifa(in_dev) } +#define in_dev_for_each_ifa_rtnl(ifa, in_dev) \ + for (ifa = (in_dev)->ifa_list; ifa; \ + ifa = ifa->ifa_next) + +#define in_dev_for_each_ifa_rcu(ifa, in_dev) \ + for (ifa = (in_dev)->ifa_list; ifa; \ + ifa = ifa->ifa_next) + static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->ip_ptr); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 701c5d113a34..7803a4d2951c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -873,13 +873,12 @@ errout: static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; - struct in_ifaddr *ifa1, **ifap; + struct in_ifaddr *ifa1; if (!ifa->ifa_local) return NULL; - for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; - ifap = &ifa1->ifa_next) { + in_dev_for_each_ifa_rtnl(ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) @@ -1208,7 +1207,7 @@ out: static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); - struct in_ifaddr *ifa; + const struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -1218,7 +1217,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int s if (!in_dev) goto out; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + in_dev_for_each_ifa_rtnl(ifa, in_dev) { if (!buf) { done += size; continue; @@ -1321,10 +1320,11 @@ EXPORT_SYMBOL(inet_select_addr); static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) { - int same = 0; + const struct in_ifaddr *ifa; __be32 addr = 0; + int same = 0; - for_ifa(in_dev) { + in_dev_for_each_ifa_rcu(ifa, in_dev) { if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) { @@ -1350,7 +1350,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, same = 0; } } - } endfor_ifa(in_dev); + } return same ? addr : 0; } @@ -1424,7 +1424,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) struct in_ifaddr *ifa; int named = 0; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + in_dev_for_each_ifa_rtnl(ifa, in_dev) { char old[IFNAMSIZ], *dot; memcpy(old, ifa->ifa_label, IFNAMSIZ); @@ -1454,10 +1454,9 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) { - struct in_ifaddr *ifa; + const struct in_ifaddr *ifa; - for (ifa = in_dev->ifa_list; ifa; - ifa = ifa->ifa_next) { + in_dev_for_each_ifa_rtnl(ifa, in_dev) { arp_send(ARPOP_REQUEST, ETH_P_ARP, ifa->ifa_local, dev, ifa->ifa_local, NULL, @@ -1727,15 +1726,17 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, int ip_idx = 0; int err; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) { - if (ip_idx < s_ip_idx) + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (ip_idx < s_ip_idx) { + ip_idx++; continue; - + } err = inet_fill_ifaddr(skb, ifa, fillargs); if (err < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + ip_idx++; } err = 0; -- cgit v1.2.3 From 2638eb8b50cfc16240e0bb080b9afbf541a9b39d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 31 May 2019 18:27:09 +0200 Subject: net: ipv4: provide __rcu annotation for ifa_list ifa_list is protected by rcu, yet code doesn't reflect this. Add the __rcu annotations and fix up all places that are now reported by sparse. I've done this in the same commit to not add intermediate patches that result in new warnings. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 12 ++-- drivers/infiniband/hw/nes/nes.c | 8 ++- drivers/infiniband/hw/usnic/usnic_ib_main.c | 15 +++-- drivers/net/ethernet/via/via-velocity.h | 2 +- drivers/net/plip/plip.c | 4 +- drivers/net/vmxnet3/vmxnet3_drv.c | 19 ++++-- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- drivers/staging/isdn/hysdn/hysdn_net.c | 6 +- include/linux/inetdevice.h | 21 ++---- net/core/netpoll.c | 10 ++- net/core/pktgen.c | 8 ++- net/ipv4/devinet.c | 88 ++++++++++++++++--------- net/mac80211/main.c | 4 +- net/netfilter/nf_nat_redirect.c | 12 ++-- 15 files changed, 134 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 337410f40860..016524683e17 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -174,10 +174,14 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, rcu_read_lock(); in = __in_dev_get_rcu(upper_dev); - if (!in->ifa_list) - local_ipaddr = 0; - else - local_ipaddr = ntohl(in->ifa_list->ifa_address); + local_ipaddr = 0; + if (in) { + struct in_ifaddr *ifa; + + ifa = rcu_dereference(in->ifa_list); + if (ifa) + local_ipaddr = ntohl(ifa->ifa_address); + } rcu_read_unlock(); } else { diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index e00add6d78ec..29b324726ea6 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -183,7 +183,13 @@ static int nes_inetaddr_event(struct notifier_block *notifier, rcu_read_lock(); in = __in_dev_get_rcu(upper_dev); - nesvnic->local_ipaddr = in->ifa_list->ifa_address; + if (in) { + struct in_ifaddr *ifa; + + ifa = rcu_dereference(in->ifa_list); + if (ifa) + nesvnic->local_ipaddr = ifa->ifa_address; + } rcu_read_unlock(); } else { nesvnic->local_ipaddr = ifa->ifa_address; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index d88d9f8a7f9a..34c1f9d6c915 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -427,11 +427,16 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (netif_carrier_ok(us_ibdev->netdev)) usnic_fwd_carrier_up(us_ibdev->ufdev); - ind = in_dev_get(netdev); - if (ind->ifa_list) - usnic_fwd_add_ipaddr(us_ibdev->ufdev, - ind->ifa_list->ifa_address); - in_dev_put(ind); + rcu_read_lock(); + ind = __in_dev_get_rcu(netdev); + if (ind) { + const struct in_ifaddr *ifa; + + ifa = rcu_dereference(ind->ifa_list); + if (ifa) + usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address); + } + rcu_read_unlock(); usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, us_ibdev->ufdev->inaddr, &gid.raw[0]); diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h index c0ecc6c7b5e0..cdfe7809e3c1 100644 --- a/drivers/net/ethernet/via/via-velocity.h +++ b/drivers/net/ethernet/via/via-velocity.h @@ -1509,7 +1509,7 @@ static inline int velocity_get_ip(struct velocity_info *vptr) rcu_read_lock(); in_dev = __in_dev_get_rcu(vptr->netdev); if (in_dev != NULL) { - ifa = (struct in_ifaddr *) in_dev->ifa_list; + ifa = rcu_dereference(in_dev->ifa_list); if (ifa != NULL) { memcpy(vptr->ip_addr, &ifa->ifa_address, 4); res = 0; diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index feb92ecd1880..3e3ac2e496a1 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -1012,7 +1012,7 @@ plip_rewrite_address(const struct net_device *dev, struct ethhdr *eth) in_dev = __in_dev_get_rcu(dev); if (in_dev) { /* Any address will do - we take the first */ - const struct in_ifaddr *ifa = in_dev->ifa_list; + const struct in_ifaddr *ifa = rcu_dereference(in_dev->ifa_list); if (ifa) { memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memset(eth->h_dest, 0xfc, 2); @@ -1107,7 +1107,7 @@ plip_open(struct net_device *dev) /* Any address will do - we take the first. We already have the first two bytes filled with 0xfc, from plip_init_dev(). */ - struct in_ifaddr *ifa=in_dev->ifa_list; + const struct in_ifaddr *ifa = rcu_dereference(in_dev->ifa_list); if (ifa != NULL) { memcpy(dev->dev_addr+2, &ifa->ifa_local, 4); } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 89984fcab01e..1b2a18ea855c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3651,13 +3651,19 @@ vmxnet3_suspend(struct device *device) } if (adapter->wol & WAKE_ARP) { - in_dev = in_dev_get(netdev); - if (!in_dev) + rcu_read_lock(); + + in_dev = __in_dev_get_rcu(netdev); + if (!in_dev) { + rcu_read_unlock(); goto skip_arp; + } - ifa = (struct in_ifaddr *)in_dev->ifa_list; - if (!ifa) + ifa = rcu_dereference(in_dev->ifa_list); + if (!ifa) { + rcu_read_unlock(); goto skip_arp; + } pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/ sizeof(struct arphdr) + /* ARP header */ @@ -3677,7 +3683,9 @@ vmxnet3_suspend(struct device *device) /* The Unicast IPv4 address in 'tip' field. */ arpreq += 2 * ETH_ALEN + sizeof(u32); - *(u32 *)arpreq = ifa->ifa_address; + *(__be32 *)arpreq = ifa->ifa_address; + + rcu_read_unlock(); /* The mask for the relevant bits. */ pmConf->filters[i].mask[0] = 0x00; @@ -3686,7 +3694,6 @@ vmxnet3_suspend(struct device *device) pmConf->filters[i].mask[3] = 0x00; pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */ pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */ - in_dev_put(in_dev); pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER; i++; diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 5477a014e1fb..37cf602d8adf 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2194,13 +2194,13 @@ static int ath6kl_wow_suspend_vif(struct ath6kl_vif *vif, if (!in_dev) return 0; - ifa = in_dev->ifa_list; + ifa = rtnl_dereference(in_dev->ifa_list); memset(&ips, 0, sizeof(ips)); /* Configure IP addr only if IP address count < MAX_IP_ADDRS */ while (index < MAX_IP_ADDRS && ifa) { ips[index] = ifa->ifa_local; - ifa = ifa->ifa_next; + ifa = rtnl_dereference(ifa->ifa_next); index++; } diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index e11a4bb67172..5a7cdb981789 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3268,7 +3268,7 @@ static void mwifiex_set_auto_arp_mef_entry(struct mwifiex_private *priv, in_dev = __in_dev_get_rtnl(adapter->priv[i]->netdev); if (!in_dev) continue; - ifa = in_dev->ifa_list; + ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa || !ifa->ifa_local) continue; ips[i] = ifa->ifa_local; diff --git a/drivers/staging/isdn/hysdn/hysdn_net.c b/drivers/staging/isdn/hysdn/hysdn_net.c index 8e9c34f33d86..bea37ae30ebb 100644 --- a/drivers/staging/isdn/hysdn/hysdn_net.c +++ b/drivers/staging/isdn/hysdn/hysdn_net.c @@ -70,9 +70,13 @@ net_open(struct net_device *dev) for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = 0xfc; if ((in_dev = dev->ip_ptr) != NULL) { - struct in_ifaddr *ifa = in_dev->ifa_list; + const struct in_ifaddr *ifa; + + rcu_read_lock(); + ifa = rcu_dereference(in_dev->ifa_list); if (ifa != NULL) memcpy(dev->dev_addr + (ETH_ALEN - sizeof(ifa->ifa_local)), &ifa->ifa_local, sizeof(ifa->ifa_local)); + rcu_read_unlock(); } } else memcpy(dev->dev_addr, card->mac_addr, ETH_ALEN); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d5d05503a04b..3515ca64e638 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -26,7 +26,7 @@ struct in_device { struct net_device *dev; refcount_t refcnt; int dead; - struct in_ifaddr *ifa_list; /* IP ifaddr chain */ + struct in_ifaddr __rcu *ifa_list;/* IP ifaddr chain */ struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ struct ip_mc_list __rcu * __rcu *mc_hash; @@ -136,7 +136,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) struct in_ifaddr { struct hlist_node hash; - struct in_ifaddr *ifa_next; + struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; __be32 ifa_local; @@ -206,22 +206,13 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr) return false; } -#define for_primary_ifa(in_dev) { struct in_ifaddr *ifa; \ - for (ifa = (in_dev)->ifa_list; ifa && !(ifa->ifa_flags&IFA_F_SECONDARY); ifa = ifa->ifa_next) - -#define for_ifa(in_dev) { struct in_ifaddr *ifa; \ - for (ifa = (in_dev)->ifa_list; ifa; ifa = ifa->ifa_next) - - -#define endfor_ifa(in_dev) } - #define in_dev_for_each_ifa_rtnl(ifa, in_dev) \ - for (ifa = (in_dev)->ifa_list; ifa; \ - ifa = ifa->ifa_next) + for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ + ifa = rtnl_dereference(ifa->ifa_next)) #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ - for (ifa = (in_dev)->ifa_list; ifa; \ - ifa = ifa->ifa_next) + for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ + ifa = rcu_dereference(ifa->ifa_next)) static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index dd8b1a460d64..2cf27da1baeb 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -696,16 +696,22 @@ int netpoll_setup(struct netpoll *np) if (!np->local_ip.ip) { if (!np->ipv6) { + const struct in_ifaddr *ifa; + in_dev = __in_dev_get_rtnl(ndev); + if (!in_dev) + goto put_noaddr; - if (!in_dev || !in_dev->ifa_list) { + ifa = rtnl_dereference(in_dev->ifa_list); + if (!ifa) { +put_noaddr: np_err(np, "no IP address for %s, aborting\n", np->dev_name); err = -EDESTADDRREQ; goto put; } - np->local_ip.ip = in_dev->ifa_list->ifa_local; + np->local_ip.ip = ifa->ifa_local; np_info(np, "local IP %pI4\n", &np->local_ip.ip); } else { #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 319ad5490fb3..4cd120dc30ad 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2125,9 +2125,11 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) rcu_read_lock(); in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { - if (in_dev->ifa_list) { - pkt_dev->saddr_min = - in_dev->ifa_list->ifa_address; + const struct in_ifaddr *ifa; + + ifa = rcu_dereference(in_dev->ifa_list); + if (ifa) { + pkt_dev->saddr_min = ifa->ifa_address; pkt_dev->saddr_max = pkt_dev->saddr_min; } } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b45421b2b734..ebaea05b4033 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -194,7 +194,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain); -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, +static void inet_del_ifa(struct in_device *in_dev, + struct in_ifaddr __rcu **ifap, int destroy); #ifdef CONFIG_SYSCTL static int devinet_sysctl_register(struct in_device *idev); @@ -300,8 +301,8 @@ static void in_dev_rcu_put(struct rcu_head *head) static void inetdev_destroy(struct in_device *in_dev) { - struct in_ifaddr *ifa; struct net_device *dev; + struct in_ifaddr *ifa; ASSERT_RTNL(); @@ -311,7 +312,7 @@ static void inetdev_destroy(struct in_device *in_dev) ip_mc_destroy_dev(in_dev); - while ((ifa = in_dev->ifa_list) != NULL) { + while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) { inet_del_ifa(in_dev, &in_dev->ifa_list, 0); inet_free_ifa(ifa); } @@ -342,17 +343,20 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) return 0; } -static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 portid) +static void __inet_del_ifa(struct in_device *in_dev, + struct in_ifaddr __rcu **ifap, + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; - struct in_ifaddr *ifa, *ifa1 = *ifap; - struct in_ifaddr *last_prim = in_dev->ifa_list; + struct in_ifaddr *ifa, *ifa1; + struct in_ifaddr *last_prim; struct in_ifaddr *prev_prom = NULL; int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); ASSERT_RTNL(); + ifa1 = rtnl_dereference(*ifap); + last_prim = rtnl_dereference(in_dev->ifa_list); if (in_dev->dead) goto no_promotions; @@ -361,9 +365,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, **/ if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { - struct in_ifaddr **ifap1 = &ifa1->ifa_next; + struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next; - while ((ifa = *ifap1) != NULL) { + while ((ifa = rtnl_dereference(*ifap1)) != NULL) { if (!(ifa->ifa_flags & IFA_F_SECONDARY) && ifa1->ifa_scope <= ifa->ifa_scope) last_prim = ifa; @@ -396,7 +400,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, * and later to add them back with new prefsrc. Do this * while all addresses are on the device list. */ - for (ifa = promote; ifa; ifa = ifa->ifa_next) { + for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) fib_del_ifaddr(ifa, ifa1); @@ -422,19 +426,24 @@ no_promotions: blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { - struct in_ifaddr *next_sec = promote->ifa_next; + struct in_ifaddr *next_sec; + next_sec = rtnl_dereference(promote->ifa_next); if (prev_prom) { - prev_prom->ifa_next = promote->ifa_next; - promote->ifa_next = last_prim->ifa_next; - last_prim->ifa_next = promote; + struct in_ifaddr *last_sec; + + last_sec = rtnl_dereference(last_prim->ifa_next); + rcu_assign_pointer(prev_prom->ifa_next, next_sec); + rcu_assign_pointer(promote->ifa_next, last_sec); + rcu_assign_pointer(last_prim->ifa_next, promote); } promote->ifa_flags &= ~IFA_F_SECONDARY; rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); - for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { + for (ifa = next_sec; ifa; + ifa = rtnl_dereference(ifa->ifa_next)) { if (ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) continue; @@ -446,7 +455,8 @@ no_promotions: inet_free_ifa(ifa1); } -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, +static void inet_del_ifa(struct in_device *in_dev, + struct in_ifaddr __rcu **ifap, int destroy) { __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); @@ -459,9 +469,10 @@ static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid, struct netlink_ext_ack *extack) { + struct in_ifaddr __rcu **last_primary, **ifap; struct in_device *in_dev = ifa->ifa_dev; - struct in_ifaddr *ifa1, **ifap, **last_primary; struct in_validator_info ivi; + struct in_ifaddr *ifa1; int ret; ASSERT_RTNL(); @@ -474,8 +485,10 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; - for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; - ifap = &ifa1->ifa_next) { + ifap = &in_dev->ifa_list; + ifa1 = rtnl_dereference(*ifap); + + while (ifa1) { if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope) last_primary = &ifa1->ifa_next; @@ -491,6 +504,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, } ifa->ifa_flags |= IFA_F_SECONDARY; } + + ifap = &ifa1->ifa_next; + ifa1 = rtnl_dereference(*ifap); } /* Allow any devices that wish to register ifaddr validtors to weigh @@ -516,8 +532,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifap = last_primary; } - ifa->ifa_next = *ifap; - *ifap = ifa; + rcu_assign_pointer(ifa->ifa_next, *ifap); + rcu_assign_pointer(*ifap, ifa); inet_hash_insert(dev_net(in_dev->dev), ifa); @@ -617,10 +633,12 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct in_ifaddr __rcu **ifap; struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; - struct in_ifaddr *ifa, **ifap; + struct in_ifaddr *ifa; + int err = -EINVAL; ASSERT_RTNL(); @@ -637,7 +655,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) @@ -725,15 +743,20 @@ static void check_lifetime(struct work_struct *work) if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { - struct in_ifaddr **ifap; - - for (ifap = &ifa->ifa_dev->ifa_list; - *ifap != NULL; ifap = &(*ifap)->ifa_next) { - if (*ifap == ifa) { + struct in_ifaddr __rcu **ifap; + struct in_ifaddr *tmp; + + ifap = &ifa->ifa_dev->ifa_list; + tmp = rtnl_dereference(*ifap); + while (tmp) { + tmp = rtnl_dereference(tmp->ifa_next); + if (rtnl_dereference(*ifap) == ifa) { inet_del_ifa(ifa->ifa_dev, ifap, 1); break; } + ifap = &tmp->ifa_next; + tmp = rtnl_dereference(*ifap); } } else if (ifa->ifa_preferred_lft != INFINITY_LIFE_TIME && @@ -977,8 +1000,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { struct sockaddr_in sin_orig; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; + struct in_ifaddr __rcu **ifap = NULL; struct in_device *in_dev; - struct in_ifaddr **ifap = NULL; struct in_ifaddr *ifa = NULL; struct net_device *dev; char *colon; @@ -1049,7 +1072,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) /* note: we only do this for a limited set of ioctls and only if the original address family was AF_INET. This is checked above. */ - for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + + for (ifap = &in_dev->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; ifap = &ifa->ifa_next) { if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == @@ -1062,7 +1087,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) 4.3BSD-style and passed in junk so we fall back to comparing just the label */ if (!ifa) { - for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + for (ifap = &in_dev->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; ifap = &ifa->ifa_next) if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2b608044ae23..1f11907dc528 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -354,11 +354,11 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, sdata_lock(sdata); /* Copy the addresses to the bss_conf list */ - ifa = idev->ifa_list; + ifa = rtnl_dereference(idev->ifa_list); while (ifa) { if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN) bss_conf->arp_addr_list[c] = ifa->ifa_address; - ifa = ifa->ifa_next; + ifa = rtnl_dereference(ifa->ifa_next); c++; } diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index 78a9e6454ff3..8598e80968e0 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -47,15 +47,17 @@ nf_nat_redirect_ipv4(struct sk_buff *skb, if (hooknum == NF_INET_LOCAL_OUT) { newdst = htonl(0x7F000001); } else { - struct in_device *indev; - struct in_ifaddr *ifa; + const struct in_device *indev; newdst = 0; indev = __in_dev_get_rcu(skb->dev); - if (indev && indev->ifa_list) { - ifa = indev->ifa_list; - newdst = ifa->ifa_local; + if (indev) { + const struct in_ifaddr *ifa; + + ifa = rcu_dereference(indev->ifa_list); + if (ifa) + newdst = ifa->ifa_local; } if (!newdst) -- cgit v1.2.3 From 80488a6b1d3c3509b69d38d7c5ac7615889ea7e0 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2019 17:15:34 +0300 Subject: software node: Add support for static node descriptors Until now the software nodes could only be created dynamically with fwnode_create_software_node() function. This introduces struct software_node data structure, which makes it possible to describe the software nodes also statically. The statically described software nodes can be registered with a new function fwnode_register_software_node(). This also adds a helper fwnode_register_software_nodes() which makes it possible to register an array of struct software_nodes, i.e. multiple nodes at the same time. There is no difference between statically described and dynamically allocated software nodes. Even the registration does not differ, except that during node creation the device properties are only copied if the node is created dynamically. With statically described nodes, the property entries in the descriptor (struct software_node) are assigned directly to the new software node that is being created without any copies. Signed-off-by: Heikki Krogerus Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 256 +++++++++++++++++++++++++++++++++++------------ include/linux/property.h | 19 ++++ 2 files changed, 212 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 7b321bf8424c..ef1a48fec718 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -11,25 +11,25 @@ #include #include -struct software_node { +struct swnode { int id; struct kobject kobj; struct fwnode_handle fwnode; + const struct software_node *node; /* hierarchy */ struct ida child_ids; struct list_head entry; struct list_head children; - struct software_node *parent; + struct swnode *parent; - /* properties */ - const struct property_entry *properties; + unsigned int allocated:1; }; static DEFINE_IDA(swnode_root_ids); static struct kset *swnode_kset; -#define kobj_to_swnode(_kobj_) container_of(_kobj_, struct software_node, kobj) +#define kobj_to_swnode(_kobj_) container_of(_kobj_, struct swnode, kobj) static const struct fwnode_operations software_node_ops; @@ -37,17 +37,56 @@ bool is_software_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &software_node_ops; } +EXPORT_SYMBOL_GPL(is_software_node); -#define to_software_node(__fwnode) \ +#define to_swnode(__fwnode) \ ({ \ - typeof(__fwnode) __to_software_node_fwnode = __fwnode; \ + typeof(__fwnode) __to_swnode_fwnode = __fwnode; \ \ - is_software_node(__to_software_node_fwnode) ? \ - container_of(__to_software_node_fwnode, \ - struct software_node, fwnode) : \ - NULL; \ + is_software_node(__to_swnode_fwnode) ? \ + container_of(__to_swnode_fwnode, \ + struct swnode, fwnode) : NULL; \ }) +static struct swnode * +software_node_to_swnode(const struct software_node *node) +{ + struct swnode *swnode; + struct kobject *k; + + if (!node) + return NULL; + + spin_lock(&swnode_kset->list_lock); + + list_for_each_entry(k, &swnode_kset->list, entry) { + swnode = kobj_to_swnode(k); + if (swnode->node == node) + break; + swnode = NULL; + } + + spin_unlock(&swnode_kset->list_lock); + + return swnode; +} + +const struct software_node *to_software_node(struct fwnode_handle *fwnode) +{ + struct swnode *swnode = to_swnode(fwnode); + + return swnode ? swnode->node : NULL; +} +EXPORT_SYMBOL_GPL(to_software_node); + +struct fwnode_handle *software_node_fwnode(const struct software_node *node) +{ + struct swnode *swnode = software_node_to_swnode(node); + + return swnode ? &swnode->fwnode : NULL; +} +EXPORT_SYMBOL_GPL(software_node_fwnode); + /* -------------------------------------------------------------------------- */ /* property_entry processing */ @@ -433,7 +472,7 @@ EXPORT_SYMBOL_GPL(property_entries_free); static struct fwnode_handle *software_node_get(struct fwnode_handle *fwnode) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); kobject_get(&swnode->kobj); @@ -442,7 +481,7 @@ static struct fwnode_handle *software_node_get(struct fwnode_handle *fwnode) static void software_node_put(struct fwnode_handle *fwnode) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); kobject_put(&swnode->kobj); } @@ -450,8 +489,9 @@ static void software_node_put(struct fwnode_handle *fwnode) static bool software_node_property_present(const struct fwnode_handle *fwnode, const char *propname) { - return !!property_entry_get(to_software_node(fwnode)->properties, - propname); + struct swnode *swnode = to_swnode(fwnode); + + return !!property_entry_get(swnode->node->properties, propname); } static int software_node_read_int_array(const struct fwnode_handle *fwnode, @@ -459,9 +499,9 @@ static int software_node_read_int_array(const struct fwnode_handle *fwnode, unsigned int elem_size, void *val, size_t nval) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); - return property_entry_read_int_array(swnode->properties, propname, + return property_entry_read_int_array(swnode->node->properties, propname, elem_size, val, nval); } @@ -469,27 +509,26 @@ static int software_node_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); - return property_entry_read_string_array(swnode->properties, propname, - val, nval); + return property_entry_read_string_array(swnode->node->properties, + propname, val, nval); } static struct fwnode_handle * software_node_get_parent(const struct fwnode_handle *fwnode) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); - return swnode ? (swnode->parent ? &swnode->parent->fwnode : NULL) : - NULL; + return swnode ? (swnode->parent ? &swnode->parent->fwnode : NULL) : NULL; } static struct fwnode_handle * software_node_get_next_child(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { - struct software_node *p = to_software_node(fwnode); - struct software_node *c = to_software_node(child); + struct swnode *p = to_swnode(fwnode); + struct swnode *c = to_swnode(child); if (!p || list_empty(&p->children) || (c && list_is_last(&c->entry, &p->children))) @@ -498,7 +537,7 @@ software_node_get_next_child(const struct fwnode_handle *fwnode, if (c) c = list_next_entry(c, entry); else - c = list_first_entry(&p->children, struct software_node, entry); + c = list_first_entry(&p->children, struct swnode, entry); return &c->fwnode; } @@ -506,15 +545,15 @@ static struct fwnode_handle * software_node_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); const struct property_entry *prop; - struct software_node *child; + struct swnode *child; if (!swnode || list_empty(&swnode->children)) return NULL; list_for_each_entry(child, &swnode->children, entry) { - prop = property_entry_get(child->properties, "name"); + prop = property_entry_get(child->node->properties, "name"); if (!prop) continue; if (!strcmp(childname, prop->value.str)) { @@ -539,7 +578,7 @@ static const struct fwnode_operations software_node_ops = { /* -------------------------------------------------------------------------- */ static int -software_node_register_properties(struct software_node *swnode, +software_node_register_properties(struct software_node *node, const struct property_entry *properties) { struct property_entry *props; @@ -548,17 +587,20 @@ software_node_register_properties(struct software_node *swnode, if (IS_ERR(props)) return PTR_ERR(props); - swnode->properties = props; + node->properties = props; return 0; } static void software_node_release(struct kobject *kobj) { - struct software_node *swnode = kobj_to_swnode(kobj); + struct swnode *swnode = kobj_to_swnode(kobj); + if (swnode->allocated) { + property_entries_free(swnode->node->properties); + kfree(swnode->node); + } ida_destroy(&swnode->child_ids); - property_entries_free(swnode->properties); kfree(swnode); } @@ -567,66 +609,154 @@ static struct kobj_type software_node_type = { .sysfs_ops = &kobj_sysfs_ops, }; -struct fwnode_handle * -fwnode_create_software_node(const struct property_entry *properties, - const struct fwnode_handle *parent) +static struct fwnode_handle * +swnode_register(const struct software_node *node, struct swnode *parent, + unsigned int allocated) { - struct software_node *p = NULL; - struct software_node *swnode; + struct swnode *swnode; int ret; - if (parent) { - if (IS_ERR(parent)) - return ERR_CAST(parent); - if (!is_software_node(parent)) - return ERR_PTR(-EINVAL); - p = to_software_node(parent); - } - swnode = kzalloc(sizeof(*swnode), GFP_KERNEL); - if (!swnode) - return ERR_PTR(-ENOMEM); + if (!swnode) { + ret = -ENOMEM; + goto out_err; + } - ret = ida_simple_get(p ? &p->child_ids : &swnode_root_ids, 0, 0, - GFP_KERNEL); + ret = ida_simple_get(parent ? &parent->child_ids : &swnode_root_ids, + 0, 0, GFP_KERNEL); if (ret < 0) { kfree(swnode); - return ERR_PTR(ret); + goto out_err; } swnode->id = ret; + swnode->node = node; + swnode->parent = parent; + swnode->allocated = allocated; swnode->kobj.kset = swnode_kset; swnode->fwnode.ops = &software_node_ops; ida_init(&swnode->child_ids); INIT_LIST_HEAD(&swnode->entry); INIT_LIST_HEAD(&swnode->children); - swnode->parent = p; - ret = kobject_init_and_add(&swnode->kobj, &software_node_type, - p ? &p->kobj : NULL, "node%d", swnode->id); + if (node->name) + ret = kobject_init_and_add(&swnode->kobj, &software_node_type, + parent ? &parent->kobj : NULL, + "%s", node->name); + else + ret = kobject_init_and_add(&swnode->kobj, &software_node_type, + parent ? &parent->kobj : NULL, + "node%d", swnode->id); if (ret) { kobject_put(&swnode->kobj); return ERR_PTR(ret); } - ret = software_node_register_properties(swnode, properties); + if (parent) + list_add_tail(&swnode->entry, &parent->children); + + kobject_uevent(&swnode->kobj, KOBJ_ADD); + return &swnode->fwnode; + +out_err: + if (allocated) + property_entries_free(node->properties); + return ERR_PTR(ret); +} + +/** + * software_node_register_nodes - Register an array of software nodes + * @nodes: Zero terminated array of software nodes to be registered + * + * Register multiple software nodes at once. + */ +int software_node_register_nodes(const struct software_node *nodes) +{ + int ret; + int i; + + for (i = 0; nodes[i].name; i++) { + ret = software_node_register(&nodes[i]); + if (ret) { + software_node_unregister_nodes(nodes); + return ret; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(software_node_register_nodes); + +/** + * software_node_unregister_nodes - Unregister an array of software nodes + * @nodes: Zero terminated array of software nodes to be unregistered + * + * Unregister multiple software nodes at once. + */ +void software_node_unregister_nodes(const struct software_node *nodes) +{ + struct swnode *swnode; + int i; + + for (i = 0; nodes[i].name; i++) { + swnode = software_node_to_swnode(&nodes[i]); + if (swnode) + fwnode_remove_software_node(&swnode->fwnode); + } +} +EXPORT_SYMBOL_GPL(software_node_unregister_nodes); + +/** + * software_node_register - Register static software node + * @node: The software node to be registered + */ +int software_node_register(const struct software_node *node) +{ + struct swnode *parent = software_node_to_swnode(node->parent); + + if (software_node_to_swnode(node)) + return -EEXIST; + + return PTR_ERR_OR_ZERO(swnode_register(node, parent, 0)); +} +EXPORT_SYMBOL_GPL(software_node_register); + +struct fwnode_handle * +fwnode_create_software_node(const struct property_entry *properties, + const struct fwnode_handle *parent) +{ + struct software_node *node; + struct swnode *p = NULL; + int ret; + + if (parent) { + if (IS_ERR(parent)) + return ERR_CAST(parent); + if (!is_software_node(parent)) + return ERR_PTR(-EINVAL); + p = to_swnode(parent); + } + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + ret = software_node_register_properties(node, properties); if (ret) { - kobject_put(&swnode->kobj); + kfree(node); return ERR_PTR(ret); } - if (p) - list_add_tail(&swnode->entry, &p->children); + node->parent = p ? p->node : NULL; - kobject_uevent(&swnode->kobj, KOBJ_ADD); - return &swnode->fwnode; + return swnode_register(node, p, 1); } EXPORT_SYMBOL_GPL(fwnode_create_software_node); void fwnode_remove_software_node(struct fwnode_handle *fwnode) { - struct software_node *swnode = to_software_node(fwnode); + struct swnode *swnode = to_swnode(fwnode); if (!swnode) return; @@ -645,7 +775,7 @@ EXPORT_SYMBOL_GPL(fwnode_remove_software_node); int software_node_notify(struct device *dev, unsigned long action) { struct fwnode_handle *fwnode = dev_fwnode(dev); - struct software_node *swnode; + struct swnode *swnode; int ret; if (!fwnode) @@ -656,7 +786,7 @@ int software_node_notify(struct device *dev, unsigned long action) if (!is_software_node(fwnode)) return 0; - swnode = to_software_node(fwnode); + swnode = to_swnode(fwnode); switch (action) { case KOBJ_ADD: diff --git a/include/linux/property.h b/include/linux/property.h index a29369c89e6e..a3813ded52ea 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -332,7 +332,26 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, /* -------------------------------------------------------------------------- */ /* Software fwnode support - when HW description is incomplete or missing */ +/** + * struct software_node - Software node description + * @name: Name of the software node + * @parent: Parent of the software node + * @properties: Array of device properties + */ +struct software_node { + const char *name; + const struct software_node *parent; + const struct property_entry *properties; +}; + bool is_software_node(const struct fwnode_handle *fwnode); +const struct software_node *to_software_node(struct fwnode_handle *fwnode); +struct fwnode_handle *software_node_fwnode(const struct software_node *node); + +int software_node_register_nodes(const struct software_node *nodes); +void software_node_unregister_nodes(const struct software_node *nodes); + +int software_node_register(const struct software_node *node); int software_node_notify(struct device *dev, unsigned long action); -- cgit v1.2.3 From b06184acf751fa52a3763e4fadfd2807e9703acd Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2019 17:15:36 +0300 Subject: software node: Add software_node_get_reference_args() This makes it possible to support drivers that use fwnode_property_get_reference_args() function. Signed-off-by: Heikki Krogerus Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/property.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 2d925fc2255f..e7b3aa3bd55a 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -560,6 +560,52 @@ software_node_get_named_child_node(const struct fwnode_handle *fwnode, return NULL; } +static int +software_node_get_reference_args(const struct fwnode_handle *fwnode, + const char *propname, const char *nargs_prop, + unsigned int nargs, unsigned int index, + struct fwnode_reference_args *args) +{ + struct swnode *swnode = to_swnode(fwnode); + const struct software_node_reference *ref; + const struct property_entry *prop; + struct fwnode_handle *refnode; + int i; + + if (!swnode || !swnode->node->references) + return -ENOENT; + + for (ref = swnode->node->references; ref->name; ref++) + if (!strcmp(ref->name, propname)) + break; + + if (!ref->name || index > (ref->nrefs - 1)) + return -ENOENT; + + refnode = software_node_fwnode(ref->refs[index].node); + if (!refnode) + return -ENOENT; + + if (nargs_prop) { + prop = property_entry_get(swnode->node->properties, nargs_prop); + if (!prop) + return -EINVAL; + + nargs = prop->value.u32_data; + } + + if (nargs > NR_FWNODE_REFERENCE_ARGS) + return -EINVAL; + + args->fwnode = software_node_get(refnode); + args->nargs = nargs; + + for (i = 0; i < nargs; i++) + args->args[i] = ref->refs[index].args[i]; + + return 0; +} + static const struct fwnode_operations software_node_ops = { .get = software_node_get, .put = software_node_put, @@ -569,6 +615,7 @@ static const struct fwnode_operations software_node_ops = { .get_parent = software_node_get_parent, .get_next_child_node = software_node_get_next_child, .get_named_child_node = software_node_get_named_child_node, + .get_reference_args = software_node_get_reference_args }; /* -------------------------------------------------------------------------- */ diff --git a/include/linux/property.h b/include/linux/property.h index a3813ded52ea..abcde2f236a0 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -332,16 +332,44 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, /* -------------------------------------------------------------------------- */ /* Software fwnode support - when HW description is incomplete or missing */ +struct software_node; + +/** + * struct software_node_ref_args - Reference with additional arguments + * @node: Reference to a software node + * @nargs: Number of elements in @args array + * @args: Integer arguments + */ +struct software_node_ref_args { + const struct software_node *node; + unsigned int nargs; + u64 args[NR_FWNODE_REFERENCE_ARGS]; +}; + +/** + * struct software_node_reference - Named software node reference property + * @name: Name of the property + * @nrefs: Number of elements in @refs array + * @refs: Array of references with optional arguments + */ +struct software_node_reference { + const char *name; + unsigned int nrefs; + const struct software_node_ref_args *refs; +}; + /** * struct software_node - Software node description * @name: Name of the software node * @parent: Parent of the software node * @properties: Array of device properties + * @references: Array of software node reference properties */ struct software_node { const char *name; const struct software_node *parent; const struct property_entry *properties; + const struct software_node_reference *references; }; bool is_software_node(const struct fwnode_handle *fwnode); -- cgit v1.2.3 From dad9bb017865ae794b6cdfac40d60b1466a09195 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2019 17:15:37 +0300 Subject: driver core: Add helper device_find_child_by_name() It looks like the child device is often matched with a name. This introduces a helper that does it automatically. Signed-off-by: Heikki Krogerus Acked-by: Greg Kroah-Hartman Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 28 ++++++++++++++++++++++++++++ include/linux/device.h | 2 ++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index fd7511e04e62..b4c64528f13c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2474,6 +2474,34 @@ struct device *device_find_child(struct device *parent, void *data, } EXPORT_SYMBOL_GPL(device_find_child); +/** + * device_find_child_by_name - device iterator for locating a child device. + * @parent: parent struct device + * @name: name of the child device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a device that has the name @name. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +struct device *device_find_child_by_name(struct device *parent, + const char *name) +{ + struct klist_iter i; + struct device *child; + + if (!parent) + return NULL; + + klist_iter_init(&parent->p->klist_children, &i); + while ((child = next_device(&i))) + if (!strcmp(dev_name(child), name) && get_device(child)) + break; + klist_iter_exit(&i); + return child; +} +EXPORT_SYMBOL_GPL(device_find_child_by_name); + int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); diff --git a/include/linux/device.h b/include/linux/device.h index e85264fb6616..5489a759e1c5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1250,6 +1250,8 @@ extern int device_for_each_child_reverse(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); extern struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); +extern struct device *device_find_child_by_name(struct device *parent, + const char *name); extern int device_rename(struct device *dev, const char *new_name); extern int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); -- cgit v1.2.3 From 83b34afb6b79c69f5478a7249451cab858af97d6 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2019 17:15:39 +0300 Subject: device property: Introduce fwnode_find_reference() In most cases the references that the drivers look for don't have any arguments. This introduces a wrapper function for fwnode_property_get_reference_args() that looks for references by using only the name and index. Signed-off-by: Heikki Krogerus Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/base/property.c | 24 ++++++++++++++++++++++++ include/linux/property.h | 4 ++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/property.c b/drivers/base/property.c index 348b37e64944..81bd01ed4042 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -484,6 +484,30 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, } EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args); +/** + * fwnode_find_reference - Find named reference to a fwnode_handle + * @fwnode: Firmware node where to look for the reference + * @name: The name of the reference + * @index: Index of the reference + * + * @index can be used when the named reference holds a table of references. + * + * Returns pointer to the reference fwnode, or ERR_PTR. Caller is responsible to + * call fwnode_handle_put() on the returned fwnode pointer. + */ +struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, + const char *name, + unsigned int index) +{ + struct fwnode_reference_args args; + int ret; + + ret = fwnode_property_get_reference_args(fwnode, name, NULL, 0, index, + &args); + return ret ? ERR_PTR(ret) : args.fwnode; +} +EXPORT_SYMBOL_GPL(fwnode_find_reference); + /** * device_remove_properties - Remove properties from a device object. * @dev: Device whose properties to remove. diff --git a/include/linux/property.h b/include/linux/property.h index abcde2f236a0..088d4db7e949 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -79,6 +79,10 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args); +struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, + const char *name, + unsigned int index); + struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); -- cgit v1.2.3 From 3370db35193b241ba5836a66df6ec1a559108389 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2019 17:15:41 +0300 Subject: usb: typec: Registering real device entries for the muxes Registering real device entries (struct device) for the mode muxes as well as for the orientation switches. The Type-C mux code was deliberately attempting to avoid creation of separate device entries for the orientation switch and the mode switch (alternate modes) because they are not physical devices. They are functions of a single physical multiplexer/demultiplexer switch device. Unfortunately because of the dependency we still have on the underlying mux device driver, we had to put in hacks like the one in the commit 3e3b81965cbf ("usb: typec: mux: Take care of driver module reference counting") to make sure the driver does not disappear from underneath us. Even with those hacks we were still left with a potential NUll pointer dereference scenario, so just creating the device entries, and letting the core take care of the dependencies. No more hacks needed. Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/intel_cht_int33fe.c | 4 +- drivers/usb/typec/bus.h | 15 ++ drivers/usb/typec/class.c | 17 ++- drivers/usb/typec/mux.c | 238 ++++++++++++++++++++++--------- drivers/usb/typec/mux/pi3usb30532.c | 46 +++--- include/linux/usb/typec_mux.h | 62 ++++---- 6 files changed, 259 insertions(+), 123 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/x86/intel_cht_int33fe.c b/drivers/platform/x86/intel_cht_int33fe.c index 6fa3cced6f8e..657b8d61554c 100644 --- a/drivers/platform/x86/intel_cht_int33fe.c +++ b/drivers/platform/x86/intel_cht_int33fe.c @@ -173,10 +173,10 @@ static int cht_int33fe_probe(struct platform_device *pdev) } data->connections[0].endpoint[0] = "port0"; - data->connections[0].endpoint[1] = "i2c-pi3usb30532"; + data->connections[0].endpoint[1] = "i2c-pi3usb30532-switch"; data->connections[0].id = "orientation-switch"; data->connections[1].endpoint[0] = "port0"; - data->connections[1].endpoint[1] = "i2c-pi3usb30532"; + data->connections[1].endpoint[1] = "i2c-pi3usb30532-mux"; data->connections[1].id = "mode-switch"; data->connections[2].endpoint[0] = "i2c-fusb302"; data->connections[2].endpoint[1] = "intel_xhci_usb_sw-role-switch"; diff --git a/drivers/usb/typec/bus.h b/drivers/usb/typec/bus.h index db40e61d8b72..0c9661c96473 100644 --- a/drivers/usb/typec/bus.h +++ b/drivers/usb/typec/bus.h @@ -35,4 +35,19 @@ extern const struct device_type typec_port_dev_type; #define is_typec_altmode(_dev_) (_dev_->type == &typec_altmode_dev_type) #define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type) +extern struct class typec_mux_class; + +struct typec_switch { + struct device dev; + typec_switch_set_fn_t set; +}; + +struct typec_mux { + struct device dev; + typec_mux_set_fn_t set; +}; + +#define to_typec_switch(_dev_) container_of(_dev_, struct typec_switch, dev) +#define to_typec_mux(_dev_) container_of(_dev_, struct typec_mux, dev) + #endif /* __USB_TYPEC_ALTMODE_H__ */ diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 2eb623841847..a18285a990a8 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1646,13 +1646,25 @@ static int __init typec_init(void) if (ret) return ret; + ret = class_register(&typec_mux_class); + if (ret) + goto err_unregister_bus; + typec_class = class_create(THIS_MODULE, "typec"); if (IS_ERR(typec_class)) { - bus_unregister(&typec_bus); - return PTR_ERR(typec_class); + ret = PTR_ERR(typec_class); + goto err_unregister_mux_class; } return 0; + +err_unregister_mux_class: + class_unregister(&typec_mux_class); + +err_unregister_bus: + bus_unregister(&typec_bus); + + return ret; } subsys_initcall(typec_init); @@ -1661,6 +1673,7 @@ static void __exit typec_exit(void) class_destroy(typec_class); ida_destroy(&typec_index_ida); bus_unregister(&typec_bus); + class_unregister(&typec_mux_class); } module_exit(typec_exit); diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c index 2ce54f3fc79c..61b7bc58dd81 100644 --- a/drivers/usb/typec/mux.c +++ b/drivers/usb/typec/mux.c @@ -15,35 +15,47 @@ #include #include -static DEFINE_MUTEX(switch_lock); -static DEFINE_MUTEX(mux_lock); -static LIST_HEAD(switch_list); -static LIST_HEAD(mux_list); +#include "bus.h" + +static int name_match(struct device *dev, const void *name) +{ + return !strcmp((const char *)name, dev_name(dev)); +} + +static bool dev_name_ends_with(struct device *dev, const char *suffix) +{ + const char *name = dev_name(dev); + const int name_len = strlen(name); + const int suffix_len = strlen(suffix); + + if (suffix_len > name_len) + return false; + + return strcmp(name + (name_len - suffix_len), suffix) == 0; +} + +static int switch_fwnode_match(struct device *dev, const void *fwnode) +{ + return dev_fwnode(dev) == fwnode && dev_name_ends_with(dev, "-switch"); +} static void *typec_switch_match(struct device_connection *con, int ep, void *data) { - struct typec_switch *sw; - - if (!con->fwnode) { - list_for_each_entry(sw, &switch_list, entry) - if (!strcmp(con->endpoint[ep], dev_name(sw->dev))) - return sw; - return ERR_PTR(-EPROBE_DEFER); - } + struct device *dev; - /* - * With OF graph the mux node must have a boolean device property named - * "orientation-switch". - */ - if (con->id && !fwnode_property_present(con->fwnode, con->id)) - return NULL; + if (con->fwnode) { + if (con->id && !fwnode_property_present(con->fwnode, con->id)) + return NULL; - list_for_each_entry(sw, &switch_list, entry) - if (dev_fwnode(sw->dev) == con->fwnode) - return sw; + dev = class_find_device(&typec_mux_class, NULL, con->fwnode, + switch_fwnode_match); + } else { + dev = class_find_device(&typec_mux_class, NULL, + con->endpoint[ep], name_match); + } - return con->id ? ERR_PTR(-EPROBE_DEFER) : NULL; + return dev ? to_typec_switch(dev) : ERR_PTR(-EPROBE_DEFER); } /** @@ -59,14 +71,10 @@ struct typec_switch *typec_switch_get(struct device *dev) { struct typec_switch *sw; - mutex_lock(&switch_lock); sw = device_connection_find_match(dev, "orientation-switch", NULL, typec_switch_match); - if (!IS_ERR_OR_NULL(sw)) { - WARN_ON(!try_module_get(sw->dev->driver->owner)); - get_device(sw->dev); - } - mutex_unlock(&switch_lock); + if (!IS_ERR_OR_NULL(sw)) + WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); return sw; } @@ -81,28 +89,64 @@ EXPORT_SYMBOL_GPL(typec_switch_get); void typec_switch_put(struct typec_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev->driver->owner); - put_device(sw->dev); + module_put(sw->dev.parent->driver->owner); + put_device(&sw->dev); } } EXPORT_SYMBOL_GPL(typec_switch_put); +static void typec_switch_release(struct device *dev) +{ + kfree(to_typec_switch(dev)); +} + +static const struct device_type typec_switch_dev_type = { + .name = "orientation_switch", + .release = typec_switch_release, +}; + /** * typec_switch_register - Register USB Type-C orientation switch - * @sw: USB Type-C orientation switch + * @parent: Parent device + * @desc: Orientation switch description * * This function registers a switch that can be used for routing the correct * data pairs depending on the cable plug orientation from the USB Type-C * connector to the USB controllers. USB Type-C plugs can be inserted * right-side-up or upside-down. */ -int typec_switch_register(struct typec_switch *sw) +struct typec_switch * +typec_switch_register(struct device *parent, + const struct typec_switch_desc *desc) { - mutex_lock(&switch_lock); - list_add_tail(&sw->entry, &switch_list); - mutex_unlock(&switch_lock); + struct typec_switch *sw; + int ret; + + if (!desc || !desc->set) + return ERR_PTR(-EINVAL); + + sw = kzalloc(sizeof(*sw), GFP_KERNEL); + if (!sw) + return ERR_PTR(-ENOMEM); - return 0; + sw->set = desc->set; + + device_initialize(&sw->dev); + sw->dev.parent = parent; + sw->dev.fwnode = desc->fwnode; + sw->dev.class = &typec_mux_class; + sw->dev.type = &typec_switch_dev_type; + sw->dev.driver_data = desc->drvdata; + dev_set_name(&sw->dev, "%s-switch", dev_name(parent)); + + ret = device_add(&sw->dev); + if (ret) { + dev_err(parent, "failed to register switch (%d)\n", ret); + put_device(&sw->dev); + return ERR_PTR(ret); + } + + return sw; } EXPORT_SYMBOL_GPL(typec_switch_register); @@ -114,28 +158,44 @@ EXPORT_SYMBOL_GPL(typec_switch_register); */ void typec_switch_unregister(struct typec_switch *sw) { - mutex_lock(&switch_lock); - list_del(&sw->entry); - mutex_unlock(&switch_lock); + if (!IS_ERR_OR_NULL(sw)) + device_unregister(&sw->dev); } EXPORT_SYMBOL_GPL(typec_switch_unregister); +void typec_switch_set_drvdata(struct typec_switch *sw, void *data) +{ + dev_set_drvdata(&sw->dev, data); +} +EXPORT_SYMBOL_GPL(typec_switch_set_drvdata); + +void *typec_switch_get_drvdata(struct typec_switch *sw) +{ + return dev_get_drvdata(&sw->dev); +} +EXPORT_SYMBOL_GPL(typec_switch_get_drvdata); + /* ------------------------------------------------------------------------- */ +static int mux_fwnode_match(struct device *dev, const void *fwnode) +{ + return dev_fwnode(dev) == fwnode && dev_name_ends_with(dev, "-mux"); +} + static void *typec_mux_match(struct device_connection *con, int ep, void *data) { const struct typec_altmode_desc *desc = data; - struct typec_mux *mux; - int nval; + struct device *dev; bool match; + int nval; u16 *val; int i; if (!con->fwnode) { - list_for_each_entry(mux, &mux_list, entry) - if (!strcmp(con->endpoint[ep], dev_name(mux->dev))) - return mux; - return ERR_PTR(-EPROBE_DEFER); + dev = class_find_device(&typec_mux_class, NULL, + con->endpoint[ep], name_match); + + return dev ? to_typec_switch(dev) : ERR_PTR(-EPROBE_DEFER); } /* @@ -180,11 +240,10 @@ static void *typec_mux_match(struct device_connection *con, int ep, void *data) return NULL; find_mux: - list_for_each_entry(mux, &mux_list, entry) - if (dev_fwnode(mux->dev) == con->fwnode) - return mux; + dev = class_find_device(&typec_mux_class, NULL, con->fwnode, + mux_fwnode_match); - return ERR_PTR(-EPROBE_DEFER); + return dev ? to_typec_switch(dev) : ERR_PTR(-EPROBE_DEFER); } /** @@ -202,14 +261,10 @@ struct typec_mux *typec_mux_get(struct device *dev, { struct typec_mux *mux; - mutex_lock(&mux_lock); mux = device_connection_find_match(dev, "mode-switch", (void *)desc, typec_mux_match); - if (!IS_ERR_OR_NULL(mux)) { - WARN_ON(!try_module_get(mux->dev->driver->owner)); - get_device(mux->dev); - } - mutex_unlock(&mux_lock); + if (!IS_ERR_OR_NULL(mux)) + WARN_ON(!try_module_get(mux->dev.parent->driver->owner)); return mux; } @@ -224,28 +279,63 @@ EXPORT_SYMBOL_GPL(typec_mux_get); void typec_mux_put(struct typec_mux *mux) { if (!IS_ERR_OR_NULL(mux)) { - module_put(mux->dev->driver->owner); - put_device(mux->dev); + module_put(mux->dev.parent->driver->owner); + put_device(&mux->dev); } } EXPORT_SYMBOL_GPL(typec_mux_put); +static void typec_mux_release(struct device *dev) +{ + kfree(to_typec_mux(dev)); +} + +static const struct device_type typec_mux_dev_type = { + .name = "mode_switch", + .release = typec_mux_release, +}; + /** * typec_mux_register - Register Multiplexer routing USB Type-C pins - * @mux: USB Type-C Connector Multiplexer/DeMultiplexer + * @parent: Parent device + * @desc: Multiplexer description * * USB Type-C connectors can be used for alternate modes of operation besides * USB when Accessory/Alternate Modes are supported. With some of those modes, * the pins on the connector need to be reconfigured. This function registers * multiplexer switches routing the pins on the connector. */ -int typec_mux_register(struct typec_mux *mux) +struct typec_mux * +typec_mux_register(struct device *parent, const struct typec_mux_desc *desc) { - mutex_lock(&mux_lock); - list_add_tail(&mux->entry, &mux_list); - mutex_unlock(&mux_lock); + struct typec_mux *mux; + int ret; + + if (!desc || !desc->set) + return ERR_PTR(-EINVAL); + + mux = kzalloc(sizeof(*mux), GFP_KERNEL); + if (!mux) + return ERR_PTR(-ENOMEM); + + mux->set = desc->set; + + device_initialize(&mux->dev); + mux->dev.parent = parent; + mux->dev.fwnode = desc->fwnode; + mux->dev.class = &typec_mux_class; + mux->dev.type = &typec_mux_dev_type; + mux->dev.driver_data = desc->drvdata; + dev_set_name(&mux->dev, "%s-mux", dev_name(parent)); + + ret = device_add(&mux->dev); + if (ret) { + dev_err(parent, "failed to register mux (%d)\n", ret); + put_device(&mux->dev); + return ERR_PTR(ret); + } - return 0; + return mux; } EXPORT_SYMBOL_GPL(typec_mux_register); @@ -257,8 +347,24 @@ EXPORT_SYMBOL_GPL(typec_mux_register); */ void typec_mux_unregister(struct typec_mux *mux) { - mutex_lock(&mux_lock); - list_del(&mux->entry); - mutex_unlock(&mux_lock); + if (!IS_ERR_OR_NULL(mux)) + device_unregister(&mux->dev); } EXPORT_SYMBOL_GPL(typec_mux_unregister); + +void typec_mux_set_drvdata(struct typec_mux *mux, void *data) +{ + dev_set_drvdata(&mux->dev, data); +} +EXPORT_SYMBOL_GPL(typec_mux_set_drvdata); + +void *typec_mux_get_drvdata(struct typec_mux *mux) +{ + return dev_get_drvdata(&mux->dev); +} +EXPORT_SYMBOL_GPL(typec_mux_get_drvdata); + +struct class typec_mux_class = { + .name = "typec_mux", + .owner = THIS_MODULE, +}; diff --git a/drivers/usb/typec/mux/pi3usb30532.c b/drivers/usb/typec/mux/pi3usb30532.c index 9294e85fd34b..5585b109095b 100644 --- a/drivers/usb/typec/mux/pi3usb30532.c +++ b/drivers/usb/typec/mux/pi3usb30532.c @@ -23,8 +23,8 @@ struct pi3usb30532 { struct i2c_client *client; struct mutex lock; /* protects the cached conf register */ - struct typec_switch sw; - struct typec_mux mux; + struct typec_switch *sw; + struct typec_mux *mux; u8 conf; }; @@ -48,7 +48,7 @@ static int pi3usb30532_set_conf(struct pi3usb30532 *pi, u8 new_conf) static int pi3usb30532_sw_set(struct typec_switch *sw, enum typec_orientation orientation) { - struct pi3usb30532 *pi = container_of(sw, struct pi3usb30532, sw); + struct pi3usb30532 *pi = typec_switch_get_drvdata(sw); u8 new_conf; int ret; @@ -75,7 +75,7 @@ static int pi3usb30532_sw_set(struct typec_switch *sw, static int pi3usb30532_mux_set(struct typec_mux *mux, int state) { - struct pi3usb30532 *pi = container_of(mux, struct pi3usb30532, mux); + struct pi3usb30532 *pi = typec_mux_get_drvdata(mux); u8 new_conf; int ret; @@ -113,6 +113,8 @@ static int pi3usb30532_mux_set(struct typec_mux *mux, int state) static int pi3usb30532_probe(struct i2c_client *client) { struct device *dev = &client->dev; + struct typec_switch_desc sw_desc; + struct typec_mux_desc mux_desc; struct pi3usb30532 *pi; int ret; @@ -121,10 +123,6 @@ static int pi3usb30532_probe(struct i2c_client *client) return -ENOMEM; pi->client = client; - pi->sw.dev = dev; - pi->sw.set = pi3usb30532_sw_set; - pi->mux.dev = dev; - pi->mux.set = pi3usb30532_mux_set; mutex_init(&pi->lock); ret = i2c_smbus_read_byte_data(client, PI3USB30532_CONF); @@ -134,17 +132,27 @@ static int pi3usb30532_probe(struct i2c_client *client) } pi->conf = ret; - ret = typec_switch_register(&pi->sw); - if (ret) { - dev_err(dev, "Error registering typec switch: %d\n", ret); - return ret; + sw_desc.drvdata = pi; + sw_desc.fwnode = dev->fwnode; + sw_desc.set = pi3usb30532_sw_set; + + pi->sw = typec_switch_register(dev, &sw_desc); + if (IS_ERR(pi->sw)) { + dev_err(dev, "Error registering typec switch: %ld\n", + PTR_ERR(pi->sw)); + return PTR_ERR(pi->sw); } - ret = typec_mux_register(&pi->mux); - if (ret) { - typec_switch_unregister(&pi->sw); - dev_err(dev, "Error registering typec mux: %d\n", ret); - return ret; + mux_desc.drvdata = pi; + mux_desc.fwnode = dev->fwnode; + mux_desc.set = pi3usb30532_mux_set; + + pi->mux = typec_mux_register(dev, &mux_desc); + if (IS_ERR(pi->mux)) { + typec_switch_unregister(pi->sw); + dev_err(dev, "Error registering typec mux: %ld\n", + PTR_ERR(pi->mux)); + return PTR_ERR(pi->mux); } i2c_set_clientdata(client, pi); @@ -155,8 +163,8 @@ static int pi3usb30532_remove(struct i2c_client *client) { struct pi3usb30532 *pi = i2c_get_clientdata(client); - typec_mux_unregister(&pi->mux); - typec_switch_unregister(&pi->sw); + typec_mux_unregister(pi->mux); + typec_switch_unregister(pi->sw); return 0; } diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 43f40685e53c..873ace5b0cf8 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -3,54 +3,48 @@ #ifndef __USB_TYPEC_MUX #define __USB_TYPEC_MUX -#include #include struct device; +struct typec_mux; +struct typec_switch; +struct fwnode_handle; -/** - * struct typec_switch - USB Type-C cable orientation switch - * @dev: Switch device - * @entry: List entry - * @set: Callback to the driver for setting the orientation - * - * USB Type-C pin flipper switch routing the correct data pairs from the - * connector to the USB controller depending on the orientation of the cable - * plug. - */ -struct typec_switch { - struct device *dev; - struct list_head entry; - - int (*set)(struct typec_switch *sw, enum typec_orientation orientation); -}; +typedef int (*typec_switch_set_fn_t)(struct typec_switch *sw, + enum typec_orientation orientation); -/** - * struct typec_switch - USB Type-C connector pin mux - * @dev: Mux device - * @entry: List entry - * @set: Callback to the driver for setting the state of the mux - * - * Pin Multiplexer/DeMultiplexer switch routing the USB Type-C connector pins to - * different components depending on the requested mode of operation. Used with - * Accessory/Alternate modes. - */ -struct typec_mux { - struct device *dev; - struct list_head entry; - - int (*set)(struct typec_mux *mux, int state); +struct typec_switch_desc { + struct fwnode_handle *fwnode; + typec_switch_set_fn_t set; + void *drvdata; }; struct typec_switch *typec_switch_get(struct device *dev); void typec_switch_put(struct typec_switch *sw); -int typec_switch_register(struct typec_switch *sw); +struct typec_switch * +typec_switch_register(struct device *parent, + const struct typec_switch_desc *desc); void typec_switch_unregister(struct typec_switch *sw); +void typec_switch_set_drvdata(struct typec_switch *sw, void *data); +void *typec_switch_get_drvdata(struct typec_switch *sw); + +typedef int (*typec_mux_set_fn_t)(struct typec_mux *mux, int state); + +struct typec_mux_desc { + struct fwnode_handle *fwnode; + typec_mux_set_fn_t set; + void *drvdata; +}; + struct typec_mux * typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc); void typec_mux_put(struct typec_mux *mux); -int typec_mux_register(struct typec_mux *mux); +struct typec_mux * +typec_mux_register(struct device *parent, const struct typec_mux_desc *desc); void typec_mux_unregister(struct typec_mux *mux); +void typec_mux_set_drvdata(struct typec_mux *mux, void *data); +void *typec_mux_get_drvdata(struct typec_mux *mux); + #endif /* __USB_TYPEC_MUX */ -- cgit v1.2.3 From 3bd3706251ee8ab67e69d9340ac2abdca217e733 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 23 Apr 2019 16:26:36 +0200 Subject: sched/core: Provide a pointer to the valid CPU mask In commit: 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed() wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not much difference in !RT but in RT we used this to implement migrate_disable(). Within a migrate_disable() section the CPU mask is restricted to single CPU while the "normal" CPU mask remains untouched. As an alternative implementation Ingo suggested to use: struct task_struct { const cpumask_t *cpus_ptr; cpumask_t cpus_mask; }; with t->cpus_ptr = &t->cpus_mask; In -RT we then can switch the cpus_ptr to: t->cpus_ptr = &cpumask_of(task_cpu(p)); in a migration disabled region. The rules are simple: - Code that 'uses' ->cpus_allowed would use the pointer. - Code that 'modifies' ->cpus_allowed would use the direct mask. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20190423142636.14347-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- arch/ia64/kernel/mca.c | 2 +- arch/mips/include/asm/switch_to.h | 4 +-- arch/mips/kernel/mips-mt-fpaff.c | 2 +- arch/mips/kernel/traps.c | 6 ++--- arch/powerpc/platforms/cell/spufs/sched.c | 2 +- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +- drivers/infiniband/hw/hfi1/affinity.c | 6 ++--- drivers/infiniband/hw/hfi1/sdma.c | 3 +-- drivers/infiniband/hw/qib/qib_file_ops.c | 7 +++--- fs/proc/array.c | 4 +-- include/linux/sched.h | 5 ++-- init/init_task.c | 3 ++- kernel/cgroup/cpuset.c | 2 +- kernel/fork.c | 2 ++ kernel/sched/core.c | 40 +++++++++++++++--------------- kernel/sched/cpudeadline.c | 4 +-- kernel/sched/cpupri.c | 4 +-- kernel/sched/deadline.c | 6 ++--- kernel/sched/fair.c | 34 ++++++++++++------------- kernel/sched/rt.c | 4 +-- kernel/trace/trace_hwlat.c | 2 +- lib/smp_processor_id.c | 2 +- samples/trace_events/trace-events-sample.c | 2 +- 23 files changed, 75 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6a52d761854b..79190d877fa7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->cpu = cpu; p->stack = ti; p->state = TASK_UNINTERRUPTIBLE; - cpumask_set_cpu(cpu, &p->cpus_allowed); + cpumask_set_cpu(cpu, &p->cpus_mask); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 0f813bb753c6..09cbe9042828 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -42,7 +42,7 @@ extern struct task_struct *ll_task; * inline to try to keep the overhead down. If we have been forced to run on * a "CPU" with an FPU because of a previous high level of FP computation, * but did not actually use the FPU during the most recent time-slice (CU1 - * isn't set), we undo the restriction on cpus_allowed. + * isn't set), we undo the restriction on cpus_mask. * * We're not calling set_cpus_allowed() here, because we have no need to * force prompt migration - we're already switching the current CPU to a @@ -57,7 +57,7 @@ do { \ test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ (!(KSTK_STATUS(prev) & ST0_CU1))) { \ clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ - prev->cpus_allowed = prev->thread.user_cpus_allowed; \ + prev->cpus_mask = prev->thread.user_cpus_allowed; \ } \ next->thread.emulated_fp = 0; \ } while(0) diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index a7c0f97e4b0d..1a08428eedcf 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..ac7159263da0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void) * restricted the allowed set to exclude any CPUs with FPUs, * we'll skip the procedure. */ - if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { + if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { cpumask_t tmask; current->thread.user_cpus_allowed - = current->cpus_allowed; - cpumask_and(&tmask, ¤t->cpus_allowed, + = current->cpus_mask; + cpumask_and(&tmask, ¤t->cpus_mask, &mt_fpu_cpumask); set_cpus_allowed_ptr(current, &tmask); set_thread_flag(TIF_FPUBOUND); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e56b553de27b..f18d5067cd0f 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx) * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted. */ - cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); + cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); /* Save the current cpu id for spu interrupt routing. */ ctx->last_ran = raw_smp_processor_id(); diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 604c0e3bcc83..f68baccc69f0 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 4fe662c3bbc1..c142b23bb401 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node) struct hfi1_affinity_node *entry; cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; const struct cpumask *node_mask, - *proc_mask = ¤t->cpus_allowed; + *proc_mask = current->cpus_ptr; struct hfi1_affinity_node_list *affinity = &node_affinity; struct cpu_mask_set *set = &affinity->proc; @@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node) * check whether process/context affinity has already * been set */ - if (cpumask_weight(proc_mask) == 1) { + if (current->nr_cpus_allowed == 1) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); @@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node) cpu = cpumask_first(proc_mask); cpumask_set_cpu(cpu, &set->used); goto done; - } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { + } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..7e8139ee0cc1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -855,14 +855,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, { struct sdma_rht_node *rht_node; struct sdma_engine *sde = NULL; - const struct cpumask *current_mask = ¤t->cpus_allowed; unsigned long cpu_id; /* * To ensure that always the same sdma engine(s) will be * selected make sure the process is pinned to this CPU only. */ - if (cpumask_weight(current_mask) != 1) + if (current->nr_cpus_allowed != 1) goto out; cpu_id = smp_processor_id(); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 78fa634de98a..27b6e664e59d 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt) static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) { struct qib_filedata *fd = fp->private_data; - const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const unsigned int weight = current->nr_cpus_allowed; const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); int local_cpu; @@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) ret = find_free_ctxt(i_minor - 1, fp, uinfo); else { int unit; - const unsigned int cpu = cpumask_first(¤t->cpus_allowed); - const unsigned int weight = - cpumask_weight(¤t->cpus_allowed); + const unsigned int cpu = cpumask_first(current->cpus_ptr); + const unsigned int weight = current->nr_cpus_allowed; if (weight == 1 && !test_bit(cpu, qib_cpulist)) if (!find_hca(cpu, &unit) && unit >= 0) diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..84908556ea58 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1b2590a8d038 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -651,7 +651,8 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; - cpumask_t cpus_allowed; + const cpumask_t *cpus_ptr; + cpumask_t cpus_mask; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1399,7 +1400,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ diff --git a/init/init_task.c b/init/init_task.c index c70ef656d0f4..3c27c0efa316 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -72,7 +72,8 @@ struct task_struct init_task .static_prio = MAX_PRIO - 20, .normal_prio = MAX_PRIO - 20, .policy = SCHED_NORMAL, - .cpus_allowed = CPU_MASK_ALL, + .cpus_ptr = &init_task.cpus_mask, + .cpus_mask = CPU_MASK_ALL, .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 6a1942ed781c..fe90fa1899e6 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task) if (task_css_is_root(task, cpuset_cgrp_id)) return; - set_cpus_allowed_ptr(task, ¤t->cpus_allowed); + set_cpus_allowed_ptr(task, current->cpus_ptr); task->mems_allowed = current->mems_allowed; } diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..6be686283e55 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -894,6 +894,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_STACKPROTECTOR tsk->stack_canary = get_random_canary(); #endif + if (orig->cpus_ptr == &orig->cpus_mask) + tsk->cpus_ptr = &tsk->cpus_mask; /* * One for us, one for whoever does the "release_task()" (usually diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 874c427742a9..93ab85f0d076 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -930,7 +930,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) */ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; if (is_per_cpu_kthread(p)) @@ -1025,7 +1025,7 @@ static int migration_cpu_stop(void *data) local_irq_disable(); /* * We need to explicitly wake pending tasks before running - * __migrate_task() such that we will not miss enforcing cpus_allowed + * __migrate_task() such that we will not miss enforcing cpus_ptr * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. */ sched_ttwu_pending(); @@ -1056,7 +1056,7 @@ static int migration_cpu_stop(void *data) */ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) { - cpumask_copy(&p->cpus_allowed, new_mask); + cpumask_copy(&p->cpus_mask, new_mask); p->nr_cpus_allowed = cpumask_weight(new_mask); } @@ -1126,7 +1126,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, goto out; } - if (cpumask_equal(&p->cpus_allowed, new_mask)) + if (cpumask_equal(p->cpus_ptr, new_mask)) goto out; if (!cpumask_intersects(new_mask, cpu_valid_mask)) { @@ -1286,10 +1286,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; - if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) + if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) goto unlock; - if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) + if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); @@ -1331,10 +1331,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; - if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) + if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) goto out; - if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) + if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); @@ -1479,7 +1479,7 @@ void kick_process(struct task_struct *p) EXPORT_SYMBOL_GPL(kick_process); /* - * ->cpus_allowed is protected by both rq->lock and p->pi_lock + * ->cpus_ptr is protected by both rq->lock and p->pi_lock * * A few notes on cpu_active vs cpu_online: * @@ -1519,14 +1519,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) return dest_cpu; } } for (;;) { /* Any allowed, online CPU? */ - for_each_cpu(dest_cpu, &p->cpus_allowed) { + for_each_cpu(dest_cpu, p->cpus_ptr) { if (!is_cpu_allowed(p, dest_cpu)) continue; @@ -1570,7 +1570,7 @@ out: } /* - * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. + * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. */ static inline int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) @@ -1580,11 +1580,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else - cpu = cpumask_any(&p->cpus_allowed); + cpu = cpumask_any(p->cpus_ptr); /* * In order not to call set_task_cpu() on a blocking task we need - * to rely on ttwu() to place the task on a valid ->cpus_allowed + * to rely on ttwu() to place the task on a valid ->cpus_ptr * CPU. * * Since this is common to all placement strategies, this lives here. @@ -2395,7 +2395,7 @@ void wake_up_new_task(struct task_struct *p) #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: - * - cpus_allowed can change in the fork path + * - cpus_ptr can change in the fork path * - any previously selected CPU might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, @@ -4267,7 +4267,7 @@ change: * the entire root_domain to become SCHED_DEADLINE. We * will also fail if there's no bandwidth available. */ - if (!cpumask_subset(span, &p->cpus_allowed) || + if (!cpumask_subset(span, p->cpus_ptr) || rq->rd->dl_bw.bw == 0) { task_rq_unlock(rq, p, &rf); return -EPERM; @@ -4866,7 +4866,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); - cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); + cpumask_and(mask, &p->cpus_mask, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); out_unlock: @@ -5443,7 +5443,7 @@ int task_can_attach(struct task_struct *p, * allowed nodes is unnecessary. Thus, cpusets are not * applicable for such threads. This prevents checking for * success of set_cpus_allowed_ptr() on all attached tasks - * before cpus_allowed may be changed. + * before cpus_mask may be changed. */ if (p->flags & PF_NO_SETAFFINITY) { ret = -EINVAL; @@ -5470,7 +5470,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (curr_cpu == target_cpu) return 0; - if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; /* TODO: This is not properly updating schedstats */ @@ -5608,7 +5608,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) put_prev_task(rq, next); /* - * Rules for changing task_struct::cpus_allowed are holding + * Rules for changing task_struct::cpus_mask are holding * both pi_lock and rq->lock, such that holding either * stabilizes the mask. * diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 50316455ea66..d57fb2f8ae67 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -124,14 +124,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { + cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { return 1; } else { int best_cpu = cpudl_maximum(cp); WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); - if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && + if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { if (later_mask) cpumask_set_cpu(best_cpu, later_mask); diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index daaadf939ccb..f7d2c10b4c92 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -98,11 +98,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) continue; if (lowest_mask) { - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); /* * We have to ensure that we have at least one bit diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 43901fa3f269..c1ef30861068 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * If we cannot preempt any rq, fall back to pick any * online CPU: */ - cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); + cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); if (cpu >= nr_cpu_ids) { /* * Failed to find any suitable CPU. @@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq) static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_allowed)) + cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; } @@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || + !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || task_running(rq, task) || !dl_task(task) || !task_on_rq_queued(task))) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f35930f5e528..8691a8fffe40 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1621,7 +1621,7 @@ static void task_numa_compare(struct task_numa_env *env, * be incurred if the tasks were swapped. */ /* Skip this swap candidate if cannot move to the source cpu */ - if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) + if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr)) goto unlock; /* @@ -1718,7 +1718,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ - if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, env->p->cpus_ptr)) continue; env->dst_cpu = cpu; @@ -5831,7 +5831,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_span(group), - &p->cpus_allowed)) + p->cpus_ptr)) continue; local_group = cpumask_test_cpu(this_cpu, @@ -5963,7 +5963,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this return cpumask_first(sched_group_span(group)); /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { + for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -6003,7 +6003,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p { int new_cpu = cpu; - if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) + if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) return prev_cpu; /* @@ -6120,7 +6120,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int if (!test_idle_cores(target, false)) return -1; - cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); for_each_cpu_wrap(core, cpus, target) { bool idle = true; @@ -6154,7 +6154,7 @@ static int select_idle_smt(struct task_struct *p, int target) return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; if (available_idle_cpu(cpu)) return cpu; @@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { if (!--nr) return -1; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; if (available_idle_cpu(cpu)) break; @@ -6254,7 +6254,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && available_idle_cpu(recent_used_cpu) && - cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { /* * Replace recent_used_cpu with prev as it is a potential * candidate for the next wake: @@ -6600,7 +6600,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) int max_spare_cap_cpu = -1; for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; /* Skip CPUs that will be overutilized. */ @@ -6689,7 +6689,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && - cpumask_test_cpu(cpu, &p->cpus_allowed); + cpumask_test_cpu(cpu, p->cpus_ptr); } rcu_read_lock(); @@ -7445,14 +7445,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or - * 2) cannot be migrated to this CPU due to cpus_allowed, or + * 2) cannot be migrated to this CPU due to cpus_ptr, or * 3) running (obviously), or * 4) are cache-hot on their current CPU. */ if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) return 0; - if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { + if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); @@ -7472,7 +7472,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* Prevent to re-select dst_cpu via env's CPUs: */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { - if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { + if (cpumask_test_cpu(cpu, p->cpus_ptr)) { env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; @@ -8099,7 +8099,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) /* * Group imbalance indicates (and tries to solve) the problem where balancing - * groups is inadequate due to ->cpus_allowed constraints. + * groups is inadequate due to ->cpus_ptr constraints. * * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. @@ -8768,7 +8768,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* * If the busiest group is imbalanced the below checks don't * work because they assume all things are equal, which typically - * isn't true due to cpus_allowed constraints and the like. + * isn't true due to cpus_ptr constraints and the like. */ if (busiest->group_type == group_imbalanced) goto force_balance; @@ -9210,7 +9210,7 @@ more_balance: * if the curr task on busiest CPU can't be * moved to this_cpu: */ - if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); env.flags |= LBF_ALL_PINNED; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 1e6b909dca36..63ad7c90822c 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_allowed)) + cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; @@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || + !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || task_running(rq, task) || !rt_task(task) || !task_on_rq_queued(task))) { diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 1e6db9cbe4dc..fa95139445b2 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -277,7 +277,7 @@ static void move_to_next_cpu(void) * of this thread, than stop migrating for the duration * of the current test. */ - if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) + if (!cpumask_equal(current_mask, current->cpus_ptr)) goto disable; get_online_cpus(); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 157d9e31f6c2..60ba93fc42ce 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) + if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) goto out; /* diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1da597aa6141..1a72b7d95cdc 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -34,7 +34,7 @@ static void simple_thread_func(int cnt) /* Silly tracepoints */ trace_foo_bar("hello", cnt, array, random_strings[len], - ¤t->cpus_allowed); + current->cpus_ptr); trace_foo_with_template_simple("HELLO", cnt); -- cgit v1.2.3 From 5e83eafbfd3b351537c0d74467fc43e8a88f4ae4 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 27 May 2019 07:21:10 +0100 Subject: sched/fair: Remove the rq->cpu_load[] update code With LB_BIAS disabled, there is no need to update the rq->cpu_load[idx] any more. Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rik van Riel Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Quentin Perret Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lkml.kernel.org/r/20190527062116.11512-2-dietmar.eggemann@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 8 -- kernel/sched/core.c | 1 - kernel/sched/fair.c | 255 --------------------------------------------- kernel/sched/sched.h | 6 -- kernel/time/tick-sched.c | 2 - 5 files changed, 272 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index b36f4cf38111..1abe91ff6e4a 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -6,14 +6,6 @@ * This is the interface between the scheduler and nohz/dynticks: */ -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern int get_nohz_timer_target(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 93ab85f0d076..00b8966802a8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3033,7 +3033,6 @@ void scheduler_tick(void) update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); - cpu_load_update_active(rq); calc_global_load_tick(rq); psi_task_tick(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 08b1cb06f968..1aab323f1b4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5322,71 +5322,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); #ifdef CONFIG_NO_HZ_COMMON -/* - * per rq 'load' arrray crap; XXX kill this. - */ - -/* - * The exact cpuload calculated at every tick would be: - * - * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load - * - * If a CPU misses updates for n ticks (as it was idle) and update gets - * called on the n+1-th tick when CPU may be busy, then we have: - * - * load_n = (1 - 1/2^i)^n * load_0 - * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load - * - * decay_load_missed() below does efficient calculation of - * - * load' = (1 - 1/2^i)^n * load - * - * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors. - * This allows us to precompute the above in said factors, thereby allowing the - * reduction of an arbitrary n in O(log_2 n) steps. (See also - * fixed_power_int()) - * - * The calculation is approximated on a 128 point scale. - */ -#define DEGRADE_SHIFT 7 - -static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; -static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { - { 0, 0, 0, 0, 0, 0, 0, 0 }, - { 64, 32, 8, 0, 0, 0, 0, 0 }, - { 96, 72, 40, 12, 1, 0, 0, 0 }, - { 112, 98, 75, 43, 15, 1, 0, 0 }, - { 120, 112, 98, 76, 45, 16, 2, 0 } -}; - -/* - * Update cpu_load for any missed ticks, due to tickless idle. The backlog - * would be when CPU is idle and so we just decay the old load without - * adding any new load. - */ -static unsigned long -decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) -{ - int j = 0; - - if (!missed_updates) - return load; - - if (missed_updates >= degrade_zero_ticks[idx]) - return 0; - - if (idx == 1) - return load >> missed_updates; - - while (missed_updates) { - if (missed_updates % 2) - load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; - - missed_updates >>= 1; - j++; - } - return load; -} static struct { cpumask_var_t idle_cpus_mask; @@ -5398,201 +5333,12 @@ static struct { #endif /* CONFIG_NO_HZ_COMMON */ -/** - * __cpu_load_update - update the rq->cpu_load[] statistics - * @this_rq: The rq to update statistics for - * @this_load: The current load - * @pending_updates: The number of missed updates - * - * Update rq->cpu_load[] statistics. This function is usually called every - * scheduler tick (TICK_NSEC). - * - * This function computes a decaying average: - * - * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load - * - * Because of NOHZ it might not get called on every tick which gives need for - * the @pending_updates argument. - * - * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1 - * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load - * = A * (A * load[i]_n-2 + B) + B - * = A * (A * (A * load[i]_n-3 + B) + B) + B - * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B - * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B - * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B - * = (1 - 1/2^i)^n * (load[i]_0 - load) + load - * - * In the above we've assumed load_n := load, which is true for NOHZ_FULL as - * any change in load would have resulted in the tick being turned back on. - * - * For regular NOHZ, this reduces to: - * - * load[i]_n = (1 - 1/2^i)^n * load[i]_0 - * - * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra - * term. - */ -static void cpu_load_update(struct rq *this_rq, unsigned long this_load, - unsigned long pending_updates) -{ - unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0]; - int i, scale; - - this_rq->nr_load_updates++; - - /* Update our load: */ - this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ - for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { - unsigned long old_load, new_load; - - /* scale is effectively 1 << i now, and >> i divides by scale */ - - old_load = this_rq->cpu_load[i]; -#ifdef CONFIG_NO_HZ_COMMON - old_load = decay_load_missed(old_load, pending_updates - 1, i); - if (tickless_load) { - old_load -= decay_load_missed(tickless_load, pending_updates - 1, i); - /* - * old_load can never be a negative value because a - * decayed tickless_load cannot be greater than the - * original tickless_load. - */ - old_load += tickless_load; - } -#endif - new_load = this_load; - /* - * Round up the averaging division if load is increasing. This - * prevents us from getting stuck on 9 if the load is 10, for - * example. - */ - if (new_load > old_load) - new_load += scale - 1; - - this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; - } -} - /* Used instead of source_load when we know the type == 0 */ static unsigned long weighted_cpuload(struct rq *rq) { return cfs_rq_runnable_load_avg(&rq->cfs); } -#ifdef CONFIG_NO_HZ_COMMON -/* - * There is no sane way to deal with nohz on smp when using jiffies because the - * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading - * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. - * - * Therefore we need to avoid the delta approach from the regular tick when - * possible since that would seriously skew the load calculation. This is why we - * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on - * jiffies deltas for updates happening while in nohz mode (idle ticks, idle - * loop exit, nohz_idle_balance, nohz full exit...) - * - * This means we might still be one tick off for nohz periods. - */ - -static void cpu_load_update_nohz(struct rq *this_rq, - unsigned long curr_jiffies, - unsigned long load) -{ - unsigned long pending_updates; - - pending_updates = curr_jiffies - this_rq->last_load_update_tick; - if (pending_updates) { - this_rq->last_load_update_tick = curr_jiffies; - /* - * In the regular NOHZ case, we were idle, this means load 0. - * In the NOHZ_FULL case, we were non-idle, we should consider - * its weighted load. - */ - cpu_load_update(this_rq, load, pending_updates); - } -} - -/* - * Called from nohz_idle_balance() to update the load ratings before doing the - * idle balance. - */ -static void cpu_load_update_idle(struct rq *this_rq) -{ - /* - * bail if there's load or we're actually up-to-date. - */ - if (weighted_cpuload(this_rq)) - return; - - cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0); -} - -/* - * Record CPU load on nohz entry so we know the tickless load to account - * on nohz exit. cpu_load[0] happens then to be updated more frequently - * than other cpu_load[idx] but it should be fine as cpu_load readers - * shouldn't rely into synchronized cpu_load[*] updates. - */ -void cpu_load_update_nohz_start(void) -{ - struct rq *this_rq = this_rq(); - - /* - * This is all lockless but should be fine. If weighted_cpuload changes - * concurrently we'll exit nohz. And cpu_load write can race with - * cpu_load_update_idle() but both updater would be writing the same. - */ - this_rq->cpu_load[0] = weighted_cpuload(this_rq); -} - -/* - * Account the tickless load in the end of a nohz frame. - */ -void cpu_load_update_nohz_stop(void) -{ - unsigned long curr_jiffies = READ_ONCE(jiffies); - struct rq *this_rq = this_rq(); - unsigned long load; - struct rq_flags rf; - - if (curr_jiffies == this_rq->last_load_update_tick) - return; - - load = weighted_cpuload(this_rq); - rq_lock(this_rq, &rf); - update_rq_clock(this_rq); - cpu_load_update_nohz(this_rq, curr_jiffies, load); - rq_unlock(this_rq, &rf); -} -#else /* !CONFIG_NO_HZ_COMMON */ -static inline void cpu_load_update_nohz(struct rq *this_rq, - unsigned long curr_jiffies, - unsigned long load) { } -#endif /* CONFIG_NO_HZ_COMMON */ - -static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load) -{ -#ifdef CONFIG_NO_HZ_COMMON - /* See the mess around cpu_load_update_nohz(). */ - this_rq->last_load_update_tick = READ_ONCE(jiffies); -#endif - cpu_load_update(this_rq, load, 1); -} - -/* - * Called from scheduler_tick() - */ -void cpu_load_update_active(struct rq *this_rq) -{ - unsigned long load = weighted_cpuload(this_rq); - - if (tick_nohz_tick_stopped()) - cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load); - else - cpu_load_update_periodic(this_rq, load); -} - /* * Return a low guess at the load of a migration-source CPU weighted * according to the scheduling class and "nice" value. @@ -9876,7 +9622,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - cpu_load_update_idle(rq); rq_unlock_irqrestore(rq, &rf); if (flags & NOHZ_BALANCE_KICK) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c308410675ed..3750b5e53792 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks; extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); -#ifdef CONFIG_SMP -extern void cpu_load_update_active(struct rq *this_rq); -#else -static inline void cpu_load_update_active(struct rq *this_rq) { } -#endif - /* * Helpers for converting nanosecond timing to jiffy resolution */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4ee1a3428ae..be9707f68024 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) */ if (!ts->tick_stopped) { calc_load_nohz_start(); - cpu_load_update_nohz_start(); quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); @@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ tick_do_update_jiffies64(now); - cpu_load_update_nohz_stop(); /* * Clear the timer idle flag, so we avoid IPIs on remote queueing and -- cgit v1.2.3 From 0e1fef63d92d61ed561e504c3a078a827a0f9bfe Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 27 May 2019 07:21:14 +0100 Subject: sched/core: Remove sd->*_idx The sched domain per rq load index files also disappear from the /proc/sys/kernel/sched_domain/cpuX/domainY directories. Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rik van Riel Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Quentin Perret Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lkml.kernel.org/r/20190527062116.11512-6-dietmar.eggemann@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 5 ----- kernel/sched/debug.c | 25 ++++++++++--------------- kernel/sched/topology.c | 10 ---------- 3 files changed, 10 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cfc0a89a7159..53afbe07354a 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -84,11 +84,6 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index a0b0d6e21e5b..7ffde8ce82fd 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -251,25 +251,20 @@ set_table_entry(struct ctl_table *entry, static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(14); + struct ctl_table *table = sd_alloc_ctl_entry(9); if (table == NULL) return NULL; - set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], "busy_idx", &sd->busy_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], "idle_idx", &sd->idle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], "wake_idx", &sd->wake_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[7], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[9], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[10], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); - /* &table[13] is terminator */ + set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); + /* &table[8] is terminator */ return table; } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index f53f89df837d..63184cf0d0d7 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl, .imbalance_pct = 125, .cache_nice_tries = 0, - .busy_idx = 0, - .idle_idx = 0, - .newidle_idx = 0, - .wake_idx = 0, - .forkexec_idx = 0, .flags = 1*SD_LOAD_BALANCE | 1*SD_BALANCE_NEWIDLE @@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl, } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->imbalance_pct = 117; sd->cache_nice_tries = 1; - sd->busy_idx = 2; #ifdef CONFIG_NUMA } else if (sd->flags & SD_NUMA) { sd->cache_nice_tries = 2; - sd->busy_idx = 3; - sd->idle_idx = 2; sd->flags &= ~SD_PREFER_SIBLING; sd->flags |= SD_SERIALIZE; @@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl, #endif } else { sd->cache_nice_tries = 1; - sd->busy_idx = 2; - sd->idle_idx = 1; } /* -- cgit v1.2.3 From d16dbd1b8a29bb9f8aca2c2f3bd1a0d2b7621126 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Mon, 6 May 2019 16:19:22 +0800 Subject: locking/lockdep: Update obsolete struct field description The lock_chain struct definition has outdated comment, update it and add struct member description. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bvanassche@acm.org Cc: frederic@kernel.org Cc: ming.lei@redhat.com Cc: will.deacon@arm.com Link: https://lkml.kernel.org/r/20190506081939.74287-7-duyuyang@gmail.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6e2377e6c1d6..851d44fa5457 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -203,11 +203,17 @@ struct lock_list { struct lock_list *parent; }; -/* - * We record lock dependency chains, so that we can cache them: +/** + * struct lock_chain - lock dependency chain record + * + * @irq_context: the same as irq_context in held_lock below + * @depth: the number of held locks in this chain + * @base: the index in chain_hlocks for this chain + * @entry: the collided lock chains in lock_chain hash list + * @chain_key: the hash key of this lock_chain */ struct lock_chain { - /* see BUILD_BUG_ON()s in lookup_chain_cache() */ + /* see BUILD_BUG_ON()s in add_chain_cache() */ unsigned int irq_context : 2, depth : 6, base : 24; -- cgit v1.2.3 From e196e479a3b844da6e6e71e0d2a8694040cb4e52 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Mon, 6 May 2019 16:19:23 +0800 Subject: locking/lockdep: Use lockdep_init_task for task initiation consistently Despite that there is a lockdep_init_task() which does nothing, lockdep initiates tasks by assigning lockdep fields and does so inconsistently. Fix this by using lockdep_init_task(). Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bvanassche@acm.org Cc: frederic@kernel.org Cc: ming.lei@redhat.com Cc: will.deacon@arm.com Link: https://lkml.kernel.org/r/20190506081939.74287-8-duyuyang@gmail.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 7 ++++++- init/init_task.c | 2 ++ kernel/fork.c | 3 --- kernel/locking/lockdep.c | 11 ++++++++--- 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 851d44fa5457..5d05b8149f19 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -287,6 +287,8 @@ extern void lockdep_free_key_range(void *start, unsigned long size); extern asmlinkage void lockdep_sys_exit(void); extern void lockdep_set_selftest_task(struct task_struct *task); +extern void lockdep_init_task(struct task_struct *task); + extern void lockdep_off(void); extern void lockdep_on(void); @@ -411,6 +413,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #else /* !CONFIG_LOCKDEP */ +static inline void lockdep_init_task(struct task_struct *task) +{ +} + static inline void lockdep_off(void) { } @@ -503,7 +509,6 @@ enum xhlock_context_t { { .name = (_name), .key = (void *)(_key), } static inline void lockdep_invariant_state(bool force) {} -static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} #ifdef CONFIG_LOCK_STAT diff --git a/init/init_task.c b/init/init_task.c index c70ef656d0f4..1b15cb90d64f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -166,6 +166,8 @@ struct task_struct init_task .softirqs_enabled = 1, #endif #ifdef CONFIG_LOCKDEP + .lockdep_depth = 0, /* no locks held yet */ + .curr_chain_key = 0, .lockdep_recursion = 0, #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..735d0b4a89e2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1984,9 +1984,6 @@ static __latent_entropy struct task_struct *copy_process( p->pagefault_disabled = 0; #ifdef CONFIG_LOCKDEP - p->lockdep_depth = 0; /* no locks held yet */ - p->curr_chain_key = 0; - p->lockdep_recursion = 0; lockdep_init_task(p); #endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bc1efc12a8c5..b7d9c28ecf3b 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -359,6 +359,13 @@ static inline u64 iterate_chain_key(u64 key, u32 idx) return k0 | (u64)k1 << 32; } +void lockdep_init_task(struct task_struct *task) +{ + task->lockdep_depth = 0; /* no locks held yet */ + task->curr_chain_key = 0; + task->lockdep_recursion = 0; +} + void lockdep_off(void) { current->lockdep_recursion++; @@ -4589,9 +4596,7 @@ void lockdep_reset(void) int i; raw_local_irq_save(flags); - current->curr_chain_key = 0; - current->lockdep_depth = 0; - current->lockdep_recursion = 0; + lockdep_init_task(current); memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); nr_hardirq_chains = 0; nr_softirq_chains = 0; -- cgit v1.2.3 From f6ec8829ac9d59b637366c13038f15d6f6156fe1 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Mon, 6 May 2019 16:19:24 +0800 Subject: locking/lockdep: Define INITIAL_CHAIN_KEY for chain keys to start with Chain keys are computed using Jenkins hash function, which needs an initial hash to start with. Dedicate a macro to make this clear and configurable. A later patch changes this initial chain key. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bvanassche@acm.org Cc: frederic@kernel.org Cc: ming.lei@redhat.com Cc: will.deacon@arm.com Link: https://lkml.kernel.org/r/20190506081939.74287-9-duyuyang@gmail.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 1 + init/init_task.c | 2 +- kernel/locking/lockdep.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 5d05b8149f19..d4e69595dbd4 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -229,6 +229,7 @@ struct lock_chain { * bitfield and hitting the BUG in hlock_class(). */ #define MAX_LOCKDEP_KEYS ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1) +#define INITIAL_CHAIN_KEY 0 struct held_lock { /* diff --git a/init/init_task.c b/init/init_task.c index 1b15cb90d64f..afa6ad795355 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -167,7 +167,7 @@ struct task_struct init_task #endif #ifdef CONFIG_LOCKDEP .lockdep_depth = 0, /* no locks held yet */ - .curr_chain_key = 0, + .curr_chain_key = INITIAL_CHAIN_KEY, .lockdep_recursion = 0, #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b7d9c28ecf3b..9edf6f12b711 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -362,7 +362,7 @@ static inline u64 iterate_chain_key(u64 key, u32 idx) void lockdep_init_task(struct task_struct *task) { task->lockdep_depth = 0; /* no locks held yet */ - task->curr_chain_key = 0; + task->curr_chain_key = INITIAL_CHAIN_KEY; task->lockdep_recursion = 0; } @@ -857,7 +857,7 @@ static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; static bool check_lock_chain_key(struct lock_chain *chain) { #ifdef CONFIG_PROVE_LOCKING - u64 chain_key = 0; + u64 chain_key = INITIAL_CHAIN_KEY; int i; for (i = chain->base; i < chain->base + chain->depth; i++) @@ -2524,7 +2524,7 @@ static void print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next) { struct held_lock *hlock; - u64 chain_key = 0; + u64 chain_key = INITIAL_CHAIN_KEY; int depth = curr->lockdep_depth; int i = get_first_held_lock(curr, hlock_next); @@ -2544,7 +2544,7 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne static void print_chain_keys_chain(struct lock_chain *chain) { int i; - u64 chain_key = 0; + u64 chain_key = INITIAL_CHAIN_KEY; int class_id; printk("depth: %u\n", chain->depth); @@ -2848,7 +2848,7 @@ static void check_chain_key(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCKDEP struct held_lock *hlock, *prev_hlock = NULL; unsigned int i; - u64 chain_key = 0; + u64 chain_key = INITIAL_CHAIN_KEY; for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; @@ -2872,7 +2872,7 @@ static void check_chain_key(struct task_struct *curr) if (prev_hlock && (prev_hlock->irq_context != hlock->irq_context)) - chain_key = 0; + chain_key = INITIAL_CHAIN_KEY; chain_key = iterate_chain_key(chain_key, hlock->class_idx); prev_hlock = hlock; } @@ -3787,14 +3787,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, /* * How can we have a chain hash when we ain't got no keys?! */ - if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) + if (DEBUG_LOCKS_WARN_ON(chain_key != INITIAL_CHAIN_KEY)) return 0; chain_head = 1; } hlock->prev_chain_key = chain_key; if (separate_irq_context(curr, hlock)) { - chain_key = 0; + chain_key = INITIAL_CHAIN_KEY; chain_head = 1; } chain_key = iterate_chain_key(chain_key, class_idx); @@ -4636,7 +4636,7 @@ static void remove_class_from_lock_chain(struct pending_free *pf, return; recalc: - chain_key = 0; + chain_key = INITIAL_CHAIN_KEY; for (i = chain->base; i < chain->base + chain->depth; i++) chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1); if (chain->depth && chain->chain_key == chain_key) -- cgit v1.2.3 From 01bb6f0af992a1e6b7797d92fd31a7864872e347 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Mon, 6 May 2019 16:19:25 +0800 Subject: locking/lockdep: Change the range of class_idx in held_lock struct held_lock->class_idx is used to point to the class of the held lock. The index is shifted by 1 to make index 0 mean no class, which results in class index shifting back and forth but is not worth doing so. The reason is: (1) there will be no "no-class" held_lock to begin with, and (2) index 0 seems to be used for error checking, but if something wrong indeed happened, the index can't be counted on to distinguish it as that something won't set the class_idx to 0 on purpose to tell us it is wrong. Therefore, change the index to start from 0. This saves a lot of back-and-forth shifts and a class slot back to lock_classes. Since index 0 is now used for lock class, we change the initial chain key to -1 to avoid key collision, which is due to the fact that __jhash_mix(0, 0, 0) = 0. Actually, the initial chain key can be any arbitrary value other than 0. In addition, a bitmap is maintained to keep track of the used lock classes, and we check the validity of the held lock against that bitmap. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bvanassche@acm.org Cc: frederic@kernel.org Cc: ming.lei@redhat.com Cc: will.deacon@arm.com Link: https://lkml.kernel.org/r/20190506081939.74287-10-duyuyang@gmail.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 14 ++++++------ kernel/locking/lockdep.c | 59 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 46 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d4e69595dbd4..30a0f81aa130 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -223,13 +223,8 @@ struct lock_chain { }; #define MAX_LOCKDEP_KEYS_BITS 13 -/* - * Subtract one because we offset hlock->class_idx by 1 in order - * to make 0 mean no class. This avoids overflowing the class_idx - * bitfield and hitting the BUG in hlock_class(). - */ -#define MAX_LOCKDEP_KEYS ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1) -#define INITIAL_CHAIN_KEY 0 +#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) +#define INITIAL_CHAIN_KEY -1 struct held_lock { /* @@ -254,6 +249,11 @@ struct held_lock { u64 waittime_stamp; u64 holdtime_stamp; #endif + /* + * class_idx is zero-indexed; it points to the element in + * lock_classes this held lock instance belongs to. class_idx is in + * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. + */ unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; /* * The lock-stack is unified in that the lock chains of interrupt diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9edf6f12b711..3eecae315885 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -151,17 +151,28 @@ unsigned long nr_lock_classes; static #endif struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); static inline struct lock_class *hlock_class(struct held_lock *hlock) { - if (!hlock->class_idx) { + unsigned int class_idx = hlock->class_idx; + + /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfield */ + barrier(); + + if (!test_bit(class_idx, lock_classes_in_use)) { /* * Someone passed in garbage, we give up. */ DEBUG_LOCKS_WARN_ON(1); return NULL; } - return lock_classes + hlock->class_idx - 1; + + /* + * At this point, if the passed hlock->class_idx is still garbage, + * we just have to live with it + */ + return lock_classes + class_idx; } #ifdef CONFIG_LOCK_STAT @@ -590,19 +601,22 @@ static void print_lock(struct held_lock *hlock) /* * We can be called locklessly through debug_show_all_locks() so be * extra careful, the hlock might have been released and cleared. + * + * If this indeed happens, lets pretend it does not hurt to continue + * to print the lock unless the hlock class_idx does not point to a + * registered class. The rationale here is: since we don't attempt + * to distinguish whether we are in this situation, if it just + * happened we can't count on class_idx to tell either. */ - unsigned int class_idx = hlock->class_idx; + struct lock_class *lock = hlock_class(hlock); - /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */ - barrier(); - - if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { + if (!lock) { printk(KERN_CONT "\n"); return; } printk(KERN_CONT "%p", hlock->instance); - print_lock_name(lock_classes + class_idx - 1); + print_lock_name(lock); printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); } @@ -861,7 +875,7 @@ static bool check_lock_chain_key(struct lock_chain *chain) int i; for (i = chain->base; i < chain->base + chain->depth; i++) - chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1); + chain_key = iterate_chain_key(chain_key, chain_hlocks[i]); /* * The 'unsigned long long' casts avoid that a compiler warning * is reported when building tools/lib/lockdep. @@ -1136,6 +1150,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) return NULL; } nr_lock_classes++; + __set_bit(class - lock_classes, lock_classes_in_use); debug_atomic_inc(nr_unused_locks); class->key = key; class->name = lock->name; @@ -2550,7 +2565,7 @@ static void print_chain_keys_chain(struct lock_chain *chain) printk("depth: %u\n", chain->depth); for (i = 0; i < chain->depth; i++) { class_id = chain_hlocks[chain->base + i]; - chain_key = print_chain_key_iteration(class_id + 1, chain_key); + chain_key = print_chain_key_iteration(class_id, chain_key); print_lock_name(lock_classes + class_id); printk("\n"); @@ -2601,7 +2616,7 @@ static int check_no_collision(struct task_struct *curr, } for (j = 0; j < chain->depth - 1; j++, i++) { - id = curr->held_locks[i].class_idx - 1; + id = curr->held_locks[i].class_idx; if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { print_collision(curr, hlock, chain); @@ -2684,7 +2699,7 @@ static inline int add_chain_cache(struct task_struct *curr, if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { chain->base = nr_chain_hlocks; for (j = 0; j < chain->depth - 1; j++, i++) { - int lock_id = curr->held_locks[i].class_idx - 1; + int lock_id = curr->held_locks[i].class_idx; chain_hlocks[chain->base + j] = lock_id; } chain_hlocks[chain->base + j] = class - lock_classes; @@ -2864,10 +2879,12 @@ static void check_chain_key(struct task_struct *curr) (unsigned long long)hlock->prev_chain_key); return; } + /* - * Whoops ran out of static storage again? + * hlock->class_idx can't go beyond MAX_LOCKDEP_KEYS, but is + * it registered lock class index? */ - if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS)) + if (DEBUG_LOCKS_WARN_ON(!test_bit(hlock->class_idx, lock_classes_in_use))) return; if (prev_hlock && (prev_hlock->irq_context != @@ -3715,7 +3732,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) return 0; - class_idx = class - lock_classes + 1; + class_idx = class - lock_classes; if (depth) { hlock = curr->held_locks + depth - 1; @@ -3777,9 +3794,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, * the hash, not class->key. */ /* - * Whoops, we did it again.. ran straight out of our static allocation. + * Whoops, we did it again.. class_idx is invalid. */ - if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS)) + if (DEBUG_LOCKS_WARN_ON(!test_bit(class_idx, lock_classes_in_use))) return 0; chain_key = curr->curr_chain_key; @@ -3894,7 +3911,7 @@ static int match_held_lock(const struct held_lock *hlock, if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) return 0; - if (hlock->class_idx == class - lock_classes + 1) + if (hlock->class_idx == class - lock_classes) return 1; } @@ -3988,7 +4005,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, lockdep_init_map(lock, name, key, 0); class = register_lock_class(lock, subclass, 0); - hlock->class_idx = class - lock_classes + 1; + hlock->class_idx = class - lock_classes; curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; @@ -4638,7 +4655,7 @@ static void remove_class_from_lock_chain(struct pending_free *pf, recalc: chain_key = INITIAL_CHAIN_KEY; for (i = chain->base; i < chain->base + chain->depth; i++) - chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1); + chain_key = iterate_chain_key(chain_key, chain_hlocks[i]); if (chain->depth && chain->chain_key == chain_key) return; /* Overwrite the chain key for concurrent RCU readers. */ @@ -4712,6 +4729,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) WRITE_ONCE(class->key, NULL); WRITE_ONCE(class->name, NULL); nr_lock_classes--; + __clear_bit(class - lock_classes, lock_classes_in_use); } else { WARN_ONCE(true, "%s() failed for class %s\n", __func__, class->name); @@ -5057,6 +5075,7 @@ void __init lockdep_init(void) printk(" memory used by lock dependency info: %zu kB\n", (sizeof(lock_classes) + + sizeof(lock_classes_in_use) + sizeof(classhash_table) + sizeof(list_entries) + sizeof(list_entries_in_use) + -- cgit v1.2.3 From aac1f7f95f115d5a5329be05b80022e72df7d080 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 12 May 2019 17:55:10 +0200 Subject: sysfs: Add sysfs_update_groups function Adding sysfs_update_groups function to update multiple groups. sysfs_update_groups - given a directory kobject, create a bunch of attribute groups @kobj: The kobject to update the group on @groups: The attribute groups to update, NULL terminated This function update a bunch of attribute groups. If an error occurs when updating a group, all previously updated groups will be removed together with already existing (not updated) attributes. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190512155518.21468-2-jolsa@kernel.org Signed-off-by: Ingo Molnar --- fs/sysfs/group.c | 54 +++++++++++++++++++++++++++++++++++++-------------- include/linux/sysfs.h | 8 ++++++++ 2 files changed, 47 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 57038604d4a8..d41c21fef138 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_create_group); +static int internal_create_groups(struct kobject *kobj, int update, + const struct attribute_group **groups) +{ + int error = 0; + int i; + + if (!groups) + return 0; + + for (i = 0; groups[i]; i++) { + error = internal_create_group(kobj, update, groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(kobj, groups[i]); + break; + } + } + return error; +} + /** * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to create the group on @@ -191,24 +211,28 @@ EXPORT_SYMBOL_GPL(sysfs_create_group); int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { - int error = 0; - int i; - - if (!groups) - return 0; - - for (i = 0; groups[i]; i++) { - error = sysfs_create_group(kobj, groups[i]); - if (error) { - while (--i >= 0) - sysfs_remove_group(kobj, groups[i]); - break; - } - } - return error; + return internal_create_groups(kobj, 0, groups); } EXPORT_SYMBOL_GPL(sysfs_create_groups); +/** + * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups + * @kobj: The kobject to update the group on + * @groups: The attribute groups to update, NULL terminated + * + * This function update a bunch of attribute groups. If an error occurs when + * updating a group, all previously updated groups will be removed together + * with already existing (not updated) attributes. + * + * Returns 0 on success or error code from sysfs_update_group on failure. + */ +int sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups) +{ + return internal_create_groups(kobj, 1, groups); +} +EXPORT_SYMBOL_GPL(sysfs_update_groups); + /** * sysfs_update_group - given a directory kobject, update an attribute group * @kobj: The kobject to update the group on diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 786816cf4aa5..965236795750 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -268,6 +268,8 @@ int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); int __must_check sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups); +int __must_check sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups); int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, @@ -433,6 +435,12 @@ static inline int sysfs_create_groups(struct kobject *kobj, return 0; } +static inline int sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups) +{ + return 0; +} + static inline int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp) { -- cgit v1.2.3 From f3a3a8257e5a1a5e67cbb1afdbc4c1c6a26f1b22 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 12 May 2019 17:55:11 +0200 Subject: perf/core: Add attr_groups_update into struct pmu Adding attr_update attribute group into pmu, to allow having multiple attribute groups for same group name. This will allow us to update "events" or "format" directories with attributes that depend on various HW conditions. For example having group_format_extra group that updates "format" directory only if pmu version is 2 and higher: static umode_t exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) { return x86_pmu.version >= 2 ? attr->mode : 0; } static struct attribute_group group_format_extra = { .name = "format", .is_visible = exra_is_visible, }; Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190512155518.21468-3-jolsa@kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0ab99c7b652d..3dc01cf98e16 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -255,6 +255,7 @@ struct pmu { struct module *module; struct device *dev; const struct attribute_group **attr_groups; + const struct attribute_group **attr_update; const char *name; int type; diff --git a/kernel/events/core.c b/kernel/events/core.c index 3005c80f621d..118ad1aef6af 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9874,6 +9874,12 @@ static int pmu_dev_alloc(struct pmu *pmu) if (ret) goto del_dev; + if (pmu->attr_update) + ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); + + if (ret) + goto del_dev; + out: return ret; -- cgit v1.2.3 From 3724921396dd1a07c93e3516b8d7c9ff570d17a9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 22 May 2019 14:22:48 +0100 Subject: locking/atomic: Use s64 for atomic64_t on 64-bit Now that all architectures use 64 consistently as the base type for the atomic64 API, let's have the CONFIG_64BIT definition of atomic64_t use s64 as the underlying type for atomic64_t, rather than long, matching the generated headers. On architectures where atomic64_read(v) is READ_ONCE(v->counter), this patch will cause the return type of atomic64_read() to be s64. As of this patch, the atomic64 API can be relied upon to consistently return s64 where a value rather than boolean condition is returned. This should make code more robust, and simpler, allowing for the removal of casts previously required to ensure consistent types. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: aou@eecs.berkeley.edu Cc: arnd@arndb.de Cc: bp@alien8.de Cc: catalin.marinas@arm.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: ink@jurassic.park.msu.ru Cc: jhogan@kernel.org Cc: linux@armlinux.org.uk Cc: mattst88@gmail.com Cc: mpe@ellerman.id.au Cc: palmer@sifive.com Cc: paul.burton@mips.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: rth@twiddle.net Cc: tony.luck@intel.com Cc: vgupta@synopsys.com Link: https://lkml.kernel.org/r/20190522132250.26499-17-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 231114ae38f4..05030f608be3 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -174,7 +174,7 @@ typedef struct { #ifdef CONFIG_64BIT typedef struct { - long counter; + s64 counter; } atomic64_t; #endif -- cgit v1.2.3 From 2d146b924ec3c0873f06308d149684dc1105d9a3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:07 +0100 Subject: backing-dev: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. And as the return value does not matter at all, no need to save the dentry in struct backing_dev_info, so delete it. Cc: Andrew Morton Cc: Anders Roxell Cc: Arnd Bergmann Cc: Michal Hocko Cc: linux-mm@kvack.org Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- include/linux/backing-dev-defs.h | 1 - mm/backing-dev.c | 24 +++++------------------- 2 files changed, 5 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 07e02d6df5ad..6a1a8a314d85 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -203,7 +203,6 @@ struct backing_dev_info { #ifdef CONFIG_DEBUG_FS struct dentry *debug_dir; - struct dentry *debug_stats; #endif }; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 909dae445ea7..e8e89158adec 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -103,39 +103,25 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats); -static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) +static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) { - if (!bdi_debug_root) - return -ENOMEM; - bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); - if (!bdi->debug_dir) - return -ENOMEM; - - bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, - bdi, &bdi_debug_stats_fops); - if (!bdi->debug_stats) { - debugfs_remove(bdi->debug_dir); - bdi->debug_dir = NULL; - return -ENOMEM; - } - return 0; + debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, + &bdi_debug_stats_fops); } static void bdi_debug_unregister(struct backing_dev_info *bdi) { - debugfs_remove(bdi->debug_stats); - debugfs_remove(bdi->debug_dir); + debugfs_remove_recursive(bdi->debug_dir); } #else static inline void bdi_debug_init(void) { } -static inline int bdi_debug_register(struct backing_dev_info *bdi, +static inline void bdi_debug_register(struct backing_dev_info *bdi, const char *name) { - return 0; } static inline void bdi_debug_unregister(struct backing_dev_info *bdi) { -- cgit v1.2.3 From b1d2dc009dece4cd7e629419b52266ba51960a6b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 23 May 2019 21:06:32 -0700 Subject: dma-contiguous: add dma_{alloc,free}_contiguous() helpers Both dma_alloc_from_contiguous() and dma_release_from_contiguous() are very simply implemented, but requiring callers to pass certain parameters like count and align, and taking a boolean parameter to check __GFP_NOWARN in the allocation flags. So every function call duplicates similar work: unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN); [...] dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); Additionally, as CMA can be used only in the context which permits sleeping, most of callers do a gfpflags_allow_blocking() check and a corresponding fallback allocation of normal pages upon any false result: if (gfpflags_allow_blocking(flag)) page = dma_alloc_from_contiguous(); if (!page) page = alloc_pages(); [...] if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); So this patch simplifies those function calls by abstracting these operations into the two new functions: dma_{alloc,free}_contiguous. As some callers of dma_{alloc,release}_from_contiguous() might be complicated, this patch just implements these two new functions to kernel/dma/direct.c only as an initial step. Suggested-by: Christoph Hellwig Signed-off-by: Nicolin Chen Tested-by: dann frazier Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 13 ++++++++++++ kernel/dma/contiguous.c | 47 ++++++++++++++++++++++++++++++++++++++++++ kernel/dma/direct.c | 24 ++++----------------- 3 files changed, 64 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 6665fa03c0d1..428f3b7b1c42 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -111,6 +111,8 @@ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn); bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); #else @@ -153,6 +155,17 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return false; } +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + return NULL; +} + +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ +} + #endif #endif diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index b2a87905846d..637b120d647b 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -214,6 +214,53 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return cma_release(dev_get_cma_area(dev), pages, count); } +/** + * dma_alloc_contiguous() - allocate contiguous pages + * @dev: Pointer to device for which the allocation is performed. + * @size: Requested allocation size. + * @gfp: Allocation flags. + * + * This function allocates contiguous memory buffer for specified device. It + * first tries to use device specific contiguous memory area if available or + * the default global one, then tries a fallback allocation of normal pages. + */ +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) +{ + int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; + size_t align = get_order(PAGE_ALIGN(size)); + struct cma *cma = dev_get_cma_area(dev); + struct page *page = NULL; + + /* CMA can be used only in the context which permits sleeping */ + if (cma && gfpflags_allow_blocking(gfp)) { + align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); + page = cma_alloc(cma, count, align, gfp & __GFP_NOWARN); + } + + /* Fallback allocation of normal pages */ + if (!page) + page = alloc_pages_node(node, gfp, align); + return page; +} + +/** + * dma_free_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @page: Pointer to the allocated pages. + * @size: Size of allocated pages. + * + * This function releases memory allocated by dma_alloc_contiguous(). As the + * cma_release returns false when provided pages do not belong to contiguous + * area and true otherwise, this function then does a fallback __free_pages() + * upon a false-return. + */ +void dma_free_contiguous(struct device *dev, struct page *page, size_t size) +{ + if (!cma_release(dev_get_cma_area(dev), page, size >> PAGE_SHIFT)) + __free_pages(page, get_order(size)); +} + /* * Support for reserved memory regions defined in device tree */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 2c2772e9702a..0816c1e8b05a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -96,8 +96,6 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); struct page *page = NULL; u64 phys_mask; @@ -109,20 +107,9 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_mask); again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, - gfp & __GFP_NOWARN); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); + dma_free_contiguous(dev, page, size); page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && @@ -154,7 +141,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (PageHighMem(page)) { /* * Depending on the cma= arguments and per-arch setup - * dma_alloc_from_contiguous could return highmem pages. + * dma_alloc_contiguous could return highmem pages. * Without remapping there is no way to return them here, * so log an error and fail. */ @@ -176,10 +163,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - if (!dma_release_from_contiguous(dev, page, count)) - __free_pages(page, get_order(size)); + dma_free_contiguous(dev, page, size); } void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, -- cgit v1.2.3 From dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 29 May 2019 17:54:25 -0700 Subject: dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, free}_contiguous() Commit fdaeec198ada ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers") adds a pair of new helper functions so as to abstract code in the dma-direct (and other places in the future), however it breaks QEMU boot feature using x86_64 defconfig. That's because x86_64 defconfig has CONFIG_DMA_CMA=n so those two newly introduced helper functions are empty in their !CONFIG_DMA_CMA version, while previously the platform independent dma-direct code had fallback alloc_pages_node() and __free_pages(). So this patch fixes it by adding alloc_pages_node() and __free_pages() in the !CONFIG_DMA_CMA version of the two helper functions. Tested with below QEMU command: qemu-system-x86_64 -m 512m \ -drive file=images/x86_64/rootfs.ext4,format=raw,if=ide \ -append 'console=ttyS0 root=/dev/sda' -nographic \ -kernel arch/x86_64/boot/bzImage with the rootfs from the below link: https://github.com/ClangBuiltLinux/continuous-integration/raw/master/images/x86_64/rootfs.ext4 Fixes: fdaeec198ada ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers") Reported-by: Nathan Chancellor Signed-off-by: Nicolin Chen Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 428f3b7b1c42..c05d4e661489 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -50,6 +50,7 @@ #ifdef __KERNEL__ #include +#include struct cma; struct page; @@ -155,15 +156,20 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return false; } +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - return NULL; + int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + size_t align = get_order(PAGE_ALIGN(size)); + + return alloc_pages_node(node, gfp, align); } static inline void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { + __free_pages(page, get_order(size)); } #endif -- cgit v1.2.3 From da83a722959a82733c3ca60030cc364ca2318c5a Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Wed, 29 May 2019 13:28:39 +0300 Subject: lib/genalloc: add gen_pool_dma_zalloc() for zeroed DMA allocations gen_pool_dma_zalloc() is a zeroed memory variant of gen_pool_dma_alloc(). Also document the return values of both, and indicate NULL as a "%NULL" constant. Signed-off-by: Fredrik Noring Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/genalloc.h | 1 + lib/genalloc.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index dd0a452373e7..6c62eeca754f 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -121,6 +121,7 @@ extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t, genpool_algo_t algo, void *data); extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma); +void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma); extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); extern void gen_pool_for_each_chunk(struct gen_pool *, void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); diff --git a/lib/genalloc.c b/lib/genalloc.c index 7e85d1e37a6e..5db43476a19b 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -337,12 +337,14 @@ EXPORT_SYMBOL(gen_pool_alloc_algo); * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage * @pool: pool to allocate from * @size: number of bytes to allocate from the pool - * @dma: dma-view physical address return value. Use NULL if unneeded. + * @dma: dma-view physical address return value. Use %NULL if unneeded. * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). * Can not be used in NMI handler on architectures without * NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated memory, or %NULL on failure */ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) { @@ -362,6 +364,31 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) } EXPORT_SYMBOL(gen_pool_dma_alloc); +/** + * gen_pool_dma_zalloc - allocate special zeroed memory from the pool for + * DMA usage + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: dma-view physical address return value. Use %NULL if unneeded. + * + * Allocate the requested number of zeroed bytes from the specified pool. + * Uses the pool allocation function (with first-fit algorithm by default). + * Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated zeroed memory, or %NULL on failure + */ +void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) +{ + void *vaddr = gen_pool_dma_alloc(pool, size, dma); + + if (vaddr) + memset(vaddr, 0, size); + + return vaddr; +} +EXPORT_SYMBOL(gen_pool_dma_zalloc); + /** * gen_pool_free - free allocated special memory back to the pool * @pool: pool to free to -- cgit v1.2.3 From b0310c2f09bbe8aebefb97ed67949a3a7092aca6 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Wed, 29 May 2019 13:28:40 +0300 Subject: USB: use genalloc for USB HCs with local memory For HCs that have local memory, replace the current DMA API usage with a genalloc generic allocator to manage the mappings for these devices. To help users, introduce a new HCD API, usb_hcd_setup_local_mem() that will setup up the genalloc backing up the device local memory. It will be used in subsequent patches. This is in preparation for dropping the existing "coherent" dma mem declaration APIs. The current implementation was relying on a short circuit in the DMA API that in the end, was acting as an allocator for these type of devices. Signed-off-by: Laurentiu Tudor Tested-by: Fredrik Noring Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- drivers/usb/Kconfig | 1 + drivers/usb/core/buffer.c | 9 +++++++++ drivers/usb/core/hcd.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/usb/host/ohci-hcd.c | 23 ++++++++++++++++++----- drivers/usb/host/ohci-mem.c | 35 +++++++++++++++++++++++++++++++---- drivers/usb/host/ohci.h | 2 ++ include/linux/usb/hcd.h | 5 +++++ 7 files changed, 102 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index e4b27413f528..389c57d8eba7 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -45,6 +45,7 @@ config USB_ARCH_HAS_HCD config USB tristate "Support for Host-side USB" depends on USB_ARCH_HAS_HCD + select GENERIC_ALLOCATOR select USB_COMMON select NLS # for UTF-8 strings ---help--- diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index f641342cdec0..d2064ad7ad14 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -124,6 +125,9 @@ void *hcd_buffer_alloc( if (size == 0) return NULL; + if (hcd->localmem_pool) + return gen_pool_dma_alloc(hcd->localmem_pool, size, dma); + /* some USB hosts just use PIO */ if (!IS_ENABLED(CONFIG_HAS_DMA) || (!is_device_dma_capable(bus->sysdev) && @@ -152,6 +156,11 @@ void hcd_buffer_free( if (!addr) return; + if (hcd->localmem_pool) { + gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); + return; + } + if (!IS_ENABLED(CONFIG_HAS_DMA) || (!is_device_dma_capable(bus->sysdev) && !(hcd->driver->flags & HCD_LOCAL_MEM))) { diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 94d22551fc1b..29b96e5e8621 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -3039,6 +3041,40 @@ usb_hcd_platform_shutdown(struct platform_device *dev) } EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown); +int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, + dma_addr_t dma, size_t size) +{ + int err; + void *local_mem; + + hcd->localmem_pool = devm_gen_pool_create(hcd->self.sysdev, PAGE_SHIFT, + dev_to_node(hcd->self.sysdev), + dev_name(hcd->self.sysdev)); + if (IS_ERR(hcd->localmem_pool)) + return PTR_ERR(hcd->localmem_pool); + + local_mem = devm_memremap(hcd->self.sysdev, phys_addr, + size, MEMREMAP_WC); + if (!local_mem) + return -ENOMEM; + + /* + * Here we pass a dma_addr_t but the arg type is a phys_addr_t. + * It's not backed by system memory and thus there's no kernel mapping + * for it. + */ + err = gen_pool_add_virt(hcd->localmem_pool, (unsigned long)local_mem, + dma, size, dev_to_node(hcd->self.sysdev)); + if (err < 0) { + dev_err(hcd->self.sysdev, "gen_pool_add_virt failed with %d\n", + err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(usb_hcd_setup_local_mem); + /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_USB_MON) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 210181fd98d2..b200b19b44fa 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -505,8 +506,15 @@ static int ohci_init (struct ohci_hcd *ohci) timer_setup(&ohci->io_watchdog, io_watchdog_func, 0); ohci->prev_frame_no = IO_WATCHDOG_OFF; - ohci->hcca = dma_alloc_coherent (hcd->self.controller, - sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL); + if (hcd->localmem_pool) + ohci->hcca = gen_pool_dma_alloc(hcd->localmem_pool, + sizeof(*ohci->hcca), + &ohci->hcca_dma); + else + ohci->hcca = dma_alloc_coherent(hcd->self.controller, + sizeof(*ohci->hcca), + &ohci->hcca_dma, + GFP_KERNEL); if (!ohci->hcca) return -ENOMEM; @@ -990,9 +998,14 @@ static void ohci_stop (struct usb_hcd *hcd) remove_debug_files (ohci); ohci_mem_cleanup (ohci); if (ohci->hcca) { - dma_free_coherent (hcd->self.controller, - sizeof *ohci->hcca, - ohci->hcca, ohci->hcca_dma); + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, + (unsigned long)ohci->hcca, + sizeof(*ohci->hcca)); + else + dma_free_coherent(hcd->self.controller, + sizeof(*ohci->hcca), + ohci->hcca, ohci->hcca_dma); ohci->hcca = NULL; ohci->hcca_dma = 0; } diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c index 3965ac0341eb..4afe27cc7e46 100644 --- a/drivers/usb/host/ohci-mem.c +++ b/drivers/usb/host/ohci-mem.c @@ -36,6 +36,13 @@ static void ohci_hcd_init (struct ohci_hcd *ohci) static int ohci_mem_init (struct ohci_hcd *ohci) { + /* + * HCs with local memory allocate from localmem_pool so there's + * no need to create the below dma pools. + */ + if (ohci_to_hcd(ohci)->localmem_pool) + return 0; + ohci->td_cache = dma_pool_create ("ohci_td", ohci_to_hcd(ohci)->self.controller, sizeof (struct td), @@ -84,8 +91,12 @@ td_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct td *td; + struct usb_hcd *hcd = ohci_to_hcd(hc); - td = dma_pool_zalloc (hc->td_cache, mem_flags, &dma); + if (hcd->localmem_pool) + td = gen_pool_dma_zalloc(hcd->localmem_pool, sizeof(*td), &dma); + else + td = dma_pool_zalloc(hc->td_cache, mem_flags, &dma); if (td) { /* in case hc fetches it, make it look dead */ td->hwNextTD = cpu_to_hc32 (hc, dma); @@ -99,6 +110,7 @@ static void td_free (struct ohci_hcd *hc, struct td *td) { struct td **prev = &hc->td_hash [TD_HASH_FUNC (td->td_dma)]; + struct usb_hcd *hcd = ohci_to_hcd(hc); while (*prev && *prev != td) prev = &(*prev)->td_hash; @@ -106,7 +118,12 @@ td_free (struct ohci_hcd *hc, struct td *td) *prev = td->td_hash; else if ((td->hwINFO & cpu_to_hc32(hc, TD_DONE)) != 0) ohci_dbg (hc, "no hash for td %p\n", td); - dma_pool_free (hc->td_cache, td, td->td_dma); + + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, (unsigned long)td, + sizeof(*td)); + else + dma_pool_free(hc->td_cache, td, td->td_dma); } /*-------------------------------------------------------------------------*/ @@ -117,8 +134,12 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct ed *ed; + struct usb_hcd *hcd = ohci_to_hcd(hc); - ed = dma_pool_zalloc (hc->ed_cache, mem_flags, &dma); + if (hcd->localmem_pool) + ed = gen_pool_dma_zalloc(hcd->localmem_pool, sizeof(*ed), &dma); + else + ed = dma_pool_zalloc(hc->ed_cache, mem_flags, &dma); if (ed) { INIT_LIST_HEAD (&ed->td_list); ed->dma = dma; @@ -129,6 +150,12 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags) static void ed_free (struct ohci_hcd *hc, struct ed *ed) { - dma_pool_free (hc->ed_cache, ed, ed->dma); + struct usb_hcd *hcd = ohci_to_hcd(hc); + + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, (unsigned long)ed, + sizeof(*ed)); + else + dma_pool_free(hc->ed_cache, ed, ed->dma); } diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index ef4813bfc5bf..b015b00774b2 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -385,6 +385,8 @@ struct ohci_hcd { /* * memory management for queue data structures + * + * @td_cache and @ed_cache are %NULL if &usb_hcd.localmem_pool is used. */ struct dma_pool *td_cache; struct dma_pool *ed_cache; diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index bb57b5af4700..127560a4bfa0 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -216,6 +216,9 @@ struct usb_hcd { #define HC_IS_RUNNING(state) ((state) & __ACTIVE) #define HC_IS_SUSPENDED(state) ((state) & __SUSPEND) + /* memory pool for HCs having local memory, or %NULL */ + struct gen_pool *localmem_pool; + /* more shared queuing code would be good; it should support * smarter scheduling, handle transaction translators, etc; * input size of periodic table to an interrupt scheduler. @@ -461,6 +464,8 @@ extern int usb_add_hcd(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags); extern void usb_remove_hcd(struct usb_hcd *hcd); extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); +int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, + dma_addr_t dma, size_t size); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); -- cgit v1.2.3 From 2d7a3dc3e24f43504b1f25eae8195e600f4cce8b Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Wed, 29 May 2019 13:28:43 +0300 Subject: USB: drop HCD_LOCAL_MEM flag With the addition of the local memory allocator, the HCD_LOCAL_MEM flag can be dropped and the checks against it replaced with a check for the localmem_pool ptr being initialized. Signed-off-by: Laurentiu Tudor Tested-by: Fredrik Noring Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- drivers/usb/core/buffer.c | 8 +++----- drivers/usb/core/hcd.c | 15 ++++++--------- drivers/usb/host/ehci-hcd.c | 2 +- drivers/usb/host/fotg210-hcd.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/ohci-sm501.c | 5 +++-- drivers/usb/host/ohci-tmio.c | 2 +- drivers/usb/host/uhci-hcd.c | 2 +- include/linux/usb/hcd.h | 1 - 9 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index d2064ad7ad14..1359b78a624e 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -68,7 +68,7 @@ int hcd_buffer_create(struct usb_hcd *hcd) if (!IS_ENABLED(CONFIG_HAS_DMA) || (!is_device_dma_capable(hcd->self.sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) + !hcd->localmem_pool)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { @@ -130,8 +130,7 @@ void *hcd_buffer_alloc( /* some USB hosts just use PIO */ if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!is_device_dma_capable(bus->sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) { + !is_device_dma_capable(bus->sysdev)) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); } @@ -162,8 +161,7 @@ void hcd_buffer_free( } if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!is_device_dma_capable(bus->sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) { + !is_device_dma_capable(bus->sysdev)) { kfree(addr); return; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 29b96e5e8621..fe631d18c1ed 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1347,14 +1347,14 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); * using regular system memory - like pci devices doing bus mastering. * * To support host controllers with limited dma capabilities we provide dma - * bounce buffers. This feature can be enabled using the HCD_LOCAL_MEM flag. + * bounce buffers. This feature can be enabled by initializing + * hcd->localmem_pool using usb_hcd_setup_local_mem(). * For this to work properly the host controller code must first use the * function dma_declare_coherent_memory() to point out which memory area * that should be used for dma allocations. * - * The HCD_LOCAL_MEM flag then tells the usb code to allocate all data for - * dma using dma_alloc_coherent() which in turn allocates from the memory - * area pointed out with dma_declare_coherent_memory(). + * The initialized hcd->localmem_pool then tells the usb code to allocate all + * data for dma using the genalloc API. * * So, to summarize... * @@ -1364,9 +1364,6 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); * (a) "normal" kernel memory is no good, and * (b) there's not enough to share * - * - The only *portable* hook for such stuff in the - * DMA framework is dma_declare_coherent_memory() - * * - So we use that, even though the primary requirement * is that the memory be "local" (hence addressable * by that device), not "coherent". @@ -1533,7 +1530,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, urb->setup_dma)) return -EAGAIN; urb->transfer_flags |= URB_SETUP_MAP_SINGLE; - } else if (hcd->driver->flags & HCD_LOCAL_MEM) { + } else if (hcd->localmem_pool) { ret = hcd_alloc_coherent( urb->dev->bus, mem_flags, &urb->setup_dma, @@ -1603,7 +1600,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, else urb->transfer_flags |= URB_DMA_MAP_SINGLE; } - } else if (hcd->driver->flags & HCD_LOCAL_MEM) { + } else if (hcd->localmem_pool) { ret = hcd_alloc_coherent( urb->dev->bus, mem_flags, &urb->transfer_dma, diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index cdafa97f632d..9da7e22848c9 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -559,7 +559,7 @@ static int ehci_init(struct usb_hcd *hcd) ehci->command = temp; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; /* Prepare for unlinking active QHs */ diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 0da68df259c8..5d74ff61fa4c 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -4995,7 +4995,7 @@ static int hcd_fotg210_init(struct usb_hcd *hcd) fotg210->command = temp; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; return 0; } diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index b200b19b44fa..5801858d867e 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -448,7 +448,7 @@ static int ohci_init (struct ohci_hcd *ohci) struct usb_hcd *hcd = ohci_to_hcd(ohci); /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; if (distrust_firmware) diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c index b710e100aec9..c158cda9e4b9 100644 --- a/drivers/usb/host/ohci-sm501.c +++ b/drivers/usb/host/ohci-sm501.c @@ -49,7 +49,7 @@ static const struct hc_driver ohci_sm501_hc_driver = { * generic hardware linkage */ .irq = ohci_irq, - .flags = HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM, + .flags = HCD_USB11 | HCD_MEMORY, /* * basic lifecycle operations @@ -153,7 +153,8 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev) * fine. This is however not always the case - buffers may be allocated * using kmalloc() - so the usb core needs to be told that it must copy * data into our local memory if the buffers happen to be placed in - * regular memory. The HCD_LOCAL_MEM flag does just that. + * regular memory. A non-null hcd->localmem_pool initialized by the + * the call to usb_hcd_setup_local_mem() below does just that. */ if (usb_hcd_setup_local_mem(hcd, mem->start, diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c index 3b84ce0c3f29..d5a293a707b6 100644 --- a/drivers/usb/host/ohci-tmio.c +++ b/drivers/usb/host/ohci-tmio.c @@ -153,7 +153,7 @@ static const struct hc_driver ohci_tmio_hc_driver = { /* generic hardware linkage */ .irq = ohci_irq, - .flags = HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM, + .flags = HCD_USB11 | HCD_MEMORY, /* basic lifecycle operations */ .start = ohci_tmio_start, diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 98deb5f64268..03bc59755123 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -581,7 +581,7 @@ static int uhci_start(struct usb_hcd *hcd) hcd->uses_new_polling = 1; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; spin_lock_init(&uhci->lock); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 127560a4bfa0..bab27ccc8ff5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -256,7 +256,6 @@ struct hc_driver { int flags; #define HCD_MEMORY 0x0001 /* HC regs use memory (else I/O) */ -#define HCD_LOCAL_MEM 0x0002 /* HC needs local memory */ #define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */ #define HCD_USB11 0x0010 /* USB 1.1 */ #define HCD_USB2 0x0020 /* USB 2.0 */ -- cgit v1.2.3 From c30700db9eaabb35e0b123301df35a6846e6b6b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Jun 2019 08:43:51 +0200 Subject: dma-direct: provide generic support for uncached kernel segments A few architectures support uncached kernel segments. In that case we get an uncached mapping for a given physica address by using an offset in the uncached segement. Implement support for this scheme in the generic dma-direct code instead of duplicating it in arch hooks. Signed-off-by: Christoph Hellwig --- arch/Kconfig | 8 ++++++++ include/linux/dma-noncoherent.h | 3 +++ kernel/dma/direct.c | 17 +++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..e8d19c3cb91f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -260,6 +260,14 @@ config ARCH_HAS_SET_MEMORY config ARCH_HAS_SET_DIRECT_MAP bool +# +# Select if arch has an uncached kernel segment and provides the +# uncached_kernel_address / cached_kernel_address symbols to use it +# +config ARCH_HAS_UNCACHED_SEGMENT + select ARCH_HAS_DMA_PREP_COHERENT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 9741767e400f..7e0126a04e02 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -80,4 +80,7 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ +void *uncached_kernel_address(void *addr); +void *cached_kernel_address(void *addr); + #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 0816c1e8b05a..b67f0aa08aa3 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -158,6 +158,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); } memset(ret, 0, size); + + if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + arch_dma_prep_coherent(page, size); + ret = uncached_kernel_address(ret); + } + return ret; } @@ -173,13 +180,18 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + + if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) + cpu_addr = cached_kernel_address(cpu_addr); __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - if (!dev_is_dma_coherent(dev)) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + !dev_is_dma_coherent(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); } @@ -187,7 +199,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!dev_is_dma_coherent(dev)) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + !dev_is_dma_coherent(dev)) arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); else dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -- cgit v1.2.3 From c9c2c27d7ceca8c2856c5008f2002bddb384f518 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Apr 2019 15:46:55 +0200 Subject: debugfs: make debugfs_create_u32_array() return void The single user of debugfs_create_u32_array() does not care about the return value of it, so make it return void as there is no need to do anything with the return value. Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/debugfs.txt | 2 +- fs/debugfs/file.c | 14 ++++---------- include/linux/debugfs.h | 12 +++++------- 3 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 4a0a9c3f4af6..9e27c843d00e 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -169,7 +169,7 @@ byte offsets over a base for the register block. If you want to dump an u32 array in debugfs, you can create file with: - struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, u32 *array, u32 elements); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ddd708b09fa1..93e4ca6b2ad7 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -997,25 +997,19 @@ static const struct file_operations u32_array_fops = { * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. - * - * The function returns a pointer to dentry on success. If an error occurs, - * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in - * the kernel, the value %ERR_PTR(-ENODEV) will be returned. */ -struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements) +void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, u32 *array, u32 elements) { struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) - return NULL; + return; data->array = array; data->elements = elements; - return debugfs_create_file_unsafe(name, mode, parent, data, - &u32_array_fops); + debugfs_create_file_unsafe(name, mode, parent, data, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3b0ba54cc4d5..58424eb3b329 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -133,9 +133,8 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode, void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); -struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements); +void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, u32 *array, u32 elements); struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, @@ -353,11 +352,10 @@ static inline bool debugfs_initialized(void) return false; } -static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements) +static inline void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, u32 *array, + u32 elements) { - return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, -- cgit v1.2.3 From a09db883e5d938b525a86a4630fc04f98ff1063d Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Tue, 4 Jun 2019 16:47:02 +0530 Subject: drm: Fix docbook warnings in hdr metadata helper structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warnings: ./include/drm/drm_mode_config.h:841: warning: Incorrect use of kernel-doc format: * hdr_output_metadata_property: Connector property containing hdr ./include/drm/drm_mode_config.h:918: warning: Function parameter or member 'hdr_output_metadata_property' not described in 'drm_mode_config' ./include/drm/drm_connector.h:1251: warning: Function parameter or member 'hdr_output_metadata' not described in 'drm_connector' ./include/drm/drm_connector.h:1251: warning: Function parameter or member 'hdr_sink_metadata' not described in 'drm_connector' Also adds some property documentation for HDR Metadata Connector Property in connector property create function. v2: Fixed Sean Paul's review comments. v3: Fixed Daniel Vetter's review comments, added the UAPI structure definition section in kernel docs. v4: Fixed Daniel Vetter's review comments. v5: Added structure member references as per Daniel's suggestion. Cc: Shashank Sharma Cc: Ville Syrjä Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: David Airlie Cc: Daniel Vetter Cc: Bartlomiej Zolnierkiewicz Cc: "Ville Syrjä" Cc: Hans Verkuil Cc: dri-devel@lists.freedesktop.org Cc: linux-fbdev@vger.kernel.org Reviewed-by: Sean Paul (v1) Signed-off-by: Uma Shankar [danvet: Fix up markup: () for functions, & for structs. Style guide also recommends to prepend struct for structures.] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1559647022-7336-1-git-send-email-uma.shankar@intel.com --- drivers/gpu/drm/drm_connector.c | 40 ++++++++++++++++++++++ include/drm/drm_connector.h | 1 + include/drm/drm_mode_config.h | 4 +-- include/linux/hdmi.h | 12 +++++++ include/uapi/drm/drm_mode.h | 74 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 128 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index c9ac8b9e83ea..e17586aaa80f 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -956,6 +956,46 @@ static const struct drm_prop_enum_list hdmi_colorspaces[] = { * is no longer protected and userspace should take appropriate action * (whatever that might be). * + * HDR_OUTPUT_METADATA: + * Connector property to enable userspace to send HDR Metadata to + * driver. This metadata is based on the composition and blending + * policies decided by user, taking into account the hardware and + * sink capabilities. The driver gets this metadata and creates a + * Dynamic Range and Mastering Infoframe (DRM) in case of HDMI, + * SDP packet (Non-audio INFOFRAME SDP v1.3) for DP. This is then + * sent to sink. This notifies the sink of the upcoming frame's Color + * Encoding and Luminance parameters. + * + * Userspace first need to detect the HDR capabilities of sink by + * reading and parsing the EDID. Details of HDR metadata for HDMI + * are added in CTA 861.G spec. For DP , its defined in VESA DP + * Standard v1.4. It needs to then get the metadata information + * of the video/game/app content which are encoded in HDR (basically + * using HDR transfer functions). With this information it needs to + * decide on a blending policy and compose the relevant + * layers/overlays into a common format. Once this blending is done, + * userspace will be aware of the metadata of the composed frame to + * be send to sink. It then uses this property to communicate this + * metadata to driver which then make a Infoframe packet and sends + * to sink based on the type of encoder connected. + * + * Userspace will be responsible to do Tone mapping operation in case: + * - Some layers are HDR and others are SDR + * - HDR layers luminance is not same as sink + * It will even need to do colorspace conversion and get all layers + * to one common colorspace for blending. It can use either GL, Media + * or display engine to get this done based on the capabilties of the + * associated hardware. + * + * Driver expects metadata to be put in &struct hdr_output_metadata + * structure from userspace. This is received as blob and stored in + * &drm_connector_state.hdr_output_metadata. It parses EDID and saves the + * sink metadata in &struct hdr_sink_metadata, as + * &drm_connector.hdr_sink_metadata. Driver uses + * drm_hdmi_infoframe_set_hdr_metadata() helper to set the HDR metadata, + * hdmi_drm_infoframe_pack() to pack the infoframe as per spec, in case of + * HDMI encoder. + * * max bpc: * This range property is used by userspace to limit the bit depth. When * used the driver would limit the bpc in accordance with the valid range diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 547656173c74..47e749b74e5f 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1244,6 +1244,7 @@ struct drm_connector { */ struct llist_node free_node; + /** @hdr_sink_metadata: HDR Metadata Information read from sink */ struct hdr_sink_metadata hdr_sink_metadata; }; diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 4f88cc972407..759d462d028b 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -837,8 +837,8 @@ struct drm_mode_config { struct drm_property *writeback_out_fence_ptr_property; /** - * hdr_output_metadata_property: Connector property containing hdr - * metatda. This will be provided by userspace compositors based + * @hdr_output_metadata_property: Connector property containing hdr + * metatada. This will be provided by userspace compositors based * on HDR content */ struct drm_property *hdr_output_metadata_property; diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index ee55ba589cdc..9918a6c910c5 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -367,8 +367,19 @@ struct hdr_static_metadata { __u16 min_cll; }; +/** + * struct hdr_sink_metadata - HDR sink metadata + * + * Metadata Information read from Sink's EDID + */ struct hdr_sink_metadata { + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ __u32 metadata_type; + /** + * @hdmi_type1: HDR Metadata Infoframe. + */ union { struct hdr_static_metadata hdmi_type1; }; @@ -398,6 +409,7 @@ union hdmi_vendor_any_infoframe { * @spd: spd infoframe * @vendor: union of all vendor infoframes * @audio: audio infoframe + * @drm: Dynamic Range and Mastering infoframe * * This is used by the generic pack function. This works since all infoframes * have the same header which also indicates which type of infoframe should be diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 19b5cf368cff..5ab331e5dc23 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -33,6 +33,15 @@ extern "C" { #endif +/** + * DOC: overview + * + * DRM exposes many UAPI and structure definition to have a consistent + * and standardized interface with user. + * Userspace can refer to these structure definitions and UAPI formats + * to communicate to driver + */ + #define DRM_CONNECTOR_NAME_LEN 32 #define DRM_DISPLAY_MODE_LEN 32 #define DRM_PROP_NAME_LEN 32 @@ -630,24 +639,87 @@ struct drm_color_lut { __u16 reserved; }; -/* HDR Metadata Infoframe as per 861.G spec */ +/** + * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. + * + * HDR Metadata Infoframe as per CTA 861.G spec. This is expected + * to match exactly with the spec. + * + * Userspace is expected to pass the metadata information as per + * the format described in this structure. + */ struct hdr_metadata_infoframe { + /** + * @eotf: Electro-Optical Transfer Function (EOTF) + * used in the stream. + */ __u8 eotf; + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ __u8 metadata_type; + /** + * @display_primaries: Color Primaries of the Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @display_primaries.x: X cordinate of color primary. + * @display_primaries.y: Y cordinate of color primary. + */ struct { __u16 x, y; } display_primaries[3]; + /** + * @white_point: White Point of Colorspace Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @white_point.x: X cordinate of whitepoint of color primary. + * @white_point.y: Y cordinate of whitepoint of color primary. + */ struct { __u16 x, y; } white_point; + /** + * @max_display_mastering_luminance: Max Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ __u16 max_display_mastering_luminance; + /** + * @min_display_mastering_luminance: Min Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of + * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF + * represents 6.5535 cd/m2. + */ __u16 min_display_mastering_luminance; + /** + * @max_cll: Max Content Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ __u16 max_cll; + /** + * @max_fall: Max Frame Average Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ __u16 max_fall; }; +/** + * struct hdr_output_metadata - HDR output metadata + * + * Metadata Information to be passed from userspace + */ struct hdr_output_metadata { + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ __u32 metadata_type; + /** + * @hdmi_metadata_type1: HDR Metadata Infoframe. + */ union { struct hdr_metadata_infoframe hdmi_metadata_type1; }; -- cgit v1.2.3 From 9a83c84c3a491cbe7fc9dea3c43e26a8e67204d2 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 28 May 2019 10:16:53 +0800 Subject: drivers: base: cacheinfo: Add variable to record max cache line size Add coherency_max_size variable to record the maximum cache line size for different cache levels. If it is available, we will synchronize it as cache line size, otherwise we will use CTR_EL0.CWG reporting in cache_line_size() for arm64. Cc: "Rafael J. Wysocki" Cc: Jeremy Linton Cc: Will Deacon Reviewed-by: Sudeep Holla Reviewed-by: Greg Kroah-Hartman Signed-off-by: Shaokun Zhang Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 5 +++++ include/linux/cacheinfo.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index a7359535caf5..8827c60f51e2 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -213,6 +213,8 @@ int __weak cache_setup_acpi(unsigned int cpu) return -ENOTSUPP; } +unsigned int coherency_max_size; + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -251,6 +253,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) cpumask_set_cpu(i, &this_leaf->shared_cpu_map); } } + /* record the maximum cache line size */ + if (this_leaf->coherency_line_size > coherency_max_size) + coherency_max_size = this_leaf->coherency_line_size; } return 0; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 70e19bc6cc9f..46b92cd61d0c 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -17,6 +17,8 @@ enum cache_type { CACHE_TYPE_UNIFIED = BIT(2), }; +extern unsigned int coherency_max_size; + /** * struct cacheinfo - represent a cache leaf node * @id: This cache's id. It is unique among caches with the same (type, level). -- cgit v1.2.3 From f257d6dcda0187693407e0c2e5dab69bdab3223f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2019 22:18:17 -0700 Subject: KVM: Directly return result from kvm_arch_check_processor_compat() Add a wrapper to invoke kvm_arch_check_processor_compat() so that the boilerplate ugliness of checking virtualization support on all CPUs is hidden from the arch specific code. x86's implementation in particular is quite heinous, as it unnecessarily propagates the out-param pattern into kvm_x86_ops. While the x86 specific issue could be resolved solely by changing kvm_x86_ops, make the change for all architectures as returning a value directly is prettier and technically more robust, e.g. s390 doesn't set the out param, which could lead to subtle breakage in the (highly unlikely) scenario where the out-param was not pre-initialized by the caller. Opportunistically annotate svm_check_processor_compat() with __init. Signed-off-by: Sean Christopherson Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/include/asm/kvm_host.h | 1 - arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 8 ++++---- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- virt/kvm/arm/arm.c | 4 ++-- virt/kvm/kvm_main.c | 9 ++++++--- 11 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0369f26ab96d..2cfe839f0b3a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -123,9 +123,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index aa3a678711be..628d3c791ad7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -425,9 +425,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = kvmppc_core_check_processor_compat(); + return kvmppc_core_check_processor_compat(); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2b00a3ebee08..da5825a3c16b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -905,7 +905,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); static inline void kvm_arch_hardware_disable(void) {} -static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 28ebd647784c..7936af0a971f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -227,6 +227,11 @@ int kvm_arch_hardware_enable(void) return 0; } +int kvm_arch_check_processor_compat(void) +{ + return 0; +} + static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 450d69a1e6fa..d5457c7bb243 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -999,7 +999,7 @@ struct kvm_x86_ops { int (*disabled_by_bios)(void); /* __init */ int (*hardware_enable)(void); void (*hardware_disable)(void); - void (*check_processor_compatibility)(void *rtn); + int (*check_processor_compatibility)(void);/* __init */ int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c56f40d430e5..302cb409d452 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5871,9 +5871,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xd9; } -static void svm_check_processor_compat(void *rtn) +static int __init svm_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } static bool svm_cpu_has_accelerated_tpr(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b35b3800a3c0..0861c71a4379 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6733,22 +6733,22 @@ static int vmx_vm_init(struct kvm *kvm) return 0; } -static void __init vmx_check_processor_compat(void *rtn) +static int __init vmx_check_processor_compat(void) { struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; - *(int *)rtn = 0; if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - *(int *)rtn = -EIO; + return -EIO; if (nested) nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, enable_apicv); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); - *(int *)rtn = -EIO; + return -EIO; } + return 0; } static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a713a74ca2e..5cb9ac9b61ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9082,9 +9082,9 @@ void kvm_arch_hardware_unsetup(void) kvm_x86_ops->hardware_unsetup(); } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - kvm_x86_ops->check_processor_compatibility(rtn); + return kvm_x86_ops->check_processor_compatibility(); } bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 79fa4426509c..5e9fd7ad8018 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -870,7 +870,7 @@ int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); -void kvm_arch_check_processor_compat(void *rtn); +int kvm_arch_check_processor_compat(void); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7eeebe5e9da2..d2389033e9d6 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -105,9 +105,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ca54b09adf5b..b2579841263f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4224,6 +4224,11 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_vcpu_put(vcpu); } +static void check_processor_compat(void *rtn) +{ + *(int *)rtn = kvm_arch_check_processor_compat(); +} + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { @@ -4255,9 +4260,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_0a; for_each_online_cpu(cpu) { - smp_call_function_single(cpu, - kvm_arch_check_processor_compat, - &r, 1); + smp_call_function_single(cpu, check_processor_compat, &r, 1); if (r < 0) goto out_free_1; } -- cgit v1.2.3 From da29e4b466e6916a52e0e2f60054f855c324a9c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:16:58 -0700 Subject: net/tls: fully initialize the msg wrapper skb If strparser gets cornered into starting a new message from an sk_buff which already has frags, it will allocate a new skb to become the "wrapper" around the fragments of the message. This new skb does not inherit any metadata fields. In case of TLS offload this may lead to unnecessarily re-encrypting the message, as skb->decrypted is not set for the wrapper skb. Try to be conservative and copy all fields of old skb strparser's user may reasonably need. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 25 +++++++++++++++++++++++++ net/strparser/strparser.c | 8 ++------ 3 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ee5e63195c0..98ff5ac98caa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1063,6 +1063,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, int max_page_order, int *errcode, gfp_t gfp_mask); +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first); /* Layout of fast clones : [skb1][skb2][fclone_ref] */ struct sk_buff_fclones { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a712a00243a..b50a5e3ac4e4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -913,6 +913,31 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) #undef C } +/** + * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg + * @first: first sk_buff of the msg + */ +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) +{ + struct sk_buff *n; + + n = alloc_skb(0, GFP_ATOMIC); + if (!n) + return NULL; + + n->len = first->len; + n->data_len = first->len; + n->truesize = first->truesize; + + skb_shinfo(n)->frag_list = first; + + __copy_skb_header(n, first); + n->destructor = NULL; + + return n; +} +EXPORT_SYMBOL_GPL(alloc_skb_for_msg); + /** * skb_morph - morph one skb into another * @dst: the skb to receive the contents diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index e137698e8aef..3fe541b746b0 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -160,18 +160,14 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, return 0; } - skb = alloc_skb(0, GFP_ATOMIC); + skb = alloc_skb_for_msg(head); if (!skb) { STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *_strp_msg(skb) = *_strp_msg(head); + strp->skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; strp->skb_head = skb; head = skb; } else { -- cgit v1.2.3 From 1210d1e6bad1e7ccccb19627b880a50d7c15dd51 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Tue, 21 May 2019 13:20:45 -0600 Subject: platform/chrome: wilco_ec: Add telemetry char device interface The Wilco Embedded Controller is able to send telemetry data which is useful for enterprise applications. A daemon running on the OS sends a command to the EC via a write() to a char device, and can read the response with a read(). The write() request is verified by the driver to ensure that it is performing only one of the whitelisted commands, and that no extraneous data is being transmitted to the EC. The response is passed directly back to the reader with no modification. The character device will appear as /dev/wilco_telemN, where N is some small non-negative integer, starting with 0. Only one process may have the file descriptor open at a time. The calling userspace program needs to keep the device file descriptor open between the calls to write() and read() in order to preserve the response. Up to 32 bytes will be available for reading. For testing purposes, try requesting the EC's firmware build date, by sending the WILCO_EC_TELEM_GET_VERSION command with argument index=3. i.e. write [0x38, 0x00, 0x03] to the device node. An ASCII string of the build date is returned. Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra --- drivers/platform/chrome/wilco_ec/Kconfig | 7 + drivers/platform/chrome/wilco_ec/Makefile | 2 + drivers/platform/chrome/wilco_ec/core.c | 13 + drivers/platform/chrome/wilco_ec/debugfs.c | 2 +- drivers/platform/chrome/wilco_ec/telemetry.c | 450 +++++++++++++++++++++++++++ include/linux/platform_data/wilco-ec.h | 2 + 6 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 drivers/platform/chrome/wilco_ec/telemetry.c (limited to 'include/linux') diff --git a/drivers/platform/chrome/wilco_ec/Kconfig b/drivers/platform/chrome/wilco_ec/Kconfig index 2c8f6f15f28f..90336874af59 100644 --- a/drivers/platform/chrome/wilco_ec/Kconfig +++ b/drivers/platform/chrome/wilco_ec/Kconfig @@ -27,3 +27,10 @@ config WILCO_EC_EVENTS (such as power state changes) to userspace. The EC sends the events over ACPI, and a driver queues up the events to be read by a userspace daemon from /dev/wilco_event using read() and poll(). + +config WILCO_EC_TELEMETRY + tristate "Enable querying telemetry data from EC" + depends on WILCO_EC + help + If you say Y here, you get support to query EC telemetry data from + /dev/wilco_telem0 using write() and then read(). diff --git a/drivers/platform/chrome/wilco_ec/Makefile b/drivers/platform/chrome/wilco_ec/Makefile index 4d8a5f068f8b..bc817164596e 100644 --- a/drivers/platform/chrome/wilco_ec/Makefile +++ b/drivers/platform/chrome/wilco_ec/Makefile @@ -6,3 +6,5 @@ wilco_ec_debugfs-objs := debugfs.o obj-$(CONFIG_WILCO_EC_DEBUGFS) += wilco_ec_debugfs.o wilco_ec_events-objs := event.o obj-$(CONFIG_WILCO_EC_EVENTS) += wilco_ec_events.o +wilco_ec_telem-objs := telemetry.o +obj-$(CONFIG_WILCO_EC_TELEMETRY) += wilco_ec_telem.o diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c index 45cf3a5ed062..3724bf4b77c6 100644 --- a/drivers/platform/chrome/wilco_ec/core.c +++ b/drivers/platform/chrome/wilco_ec/core.c @@ -93,8 +93,20 @@ static int wilco_ec_probe(struct platform_device *pdev) goto unregister_rtc; } + /* Register child device that will be found by the telemetry driver. */ + ec->telem_pdev = platform_device_register_data(dev, "wilco_telem", + PLATFORM_DEVID_AUTO, + ec, sizeof(*ec)); + if (IS_ERR(ec->telem_pdev)) { + dev_err(dev, "Failed to create telemetry platform device\n"); + ret = PTR_ERR(ec->telem_pdev); + goto remove_sysfs; + } + return 0; +remove_sysfs: + wilco_ec_remove_sysfs(ec); unregister_rtc: platform_device_unregister(ec->rtc_pdev); unregister_debugfs: @@ -109,6 +121,7 @@ static int wilco_ec_remove(struct platform_device *pdev) struct wilco_ec_device *ec = platform_get_drvdata(pdev); wilco_ec_remove_sysfs(ec); + platform_device_unregister(ec->telem_pdev); platform_device_unregister(ec->rtc_pdev); if (ec->debugfs_pdev) platform_device_unregister(ec->debugfs_pdev); diff --git a/drivers/platform/chrome/wilco_ec/debugfs.c b/drivers/platform/chrome/wilco_ec/debugfs.c index 281ec595e8e0..8d65a1e2f1a3 100644 --- a/drivers/platform/chrome/wilco_ec/debugfs.c +++ b/drivers/platform/chrome/wilco_ec/debugfs.c @@ -16,7 +16,7 @@ #define DRV_NAME "wilco-ec-debugfs" -/* The 256 raw bytes will take up more space when represented as a hex string */ +/* The raw bytes will take up more space when represented as a hex string */ #define FORMATTED_BUFFER_SIZE (EC_MAILBOX_DATA_SIZE * 4) struct wilco_ec_debugfs { diff --git a/drivers/platform/chrome/wilco_ec/telemetry.c b/drivers/platform/chrome/wilco_ec/telemetry.c new file mode 100644 index 000000000000..94cdc166c840 --- /dev/null +++ b/drivers/platform/chrome/wilco_ec/telemetry.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Telemetry communication for Wilco EC + * + * Copyright 2019 Google LLC + * + * The Wilco Embedded Controller is able to send telemetry data + * which is useful for enterprise applications. A daemon running on + * the OS sends a command to the EC via a write() to a char device, + * and can read the response with a read(). The write() request is + * verified by the driver to ensure that it is performing only one + * of the whitelisted commands, and that no extraneous data is + * being transmitted to the EC. The response is passed directly + * back to the reader with no modification. + * + * The character device will appear as /dev/wilco_telemN, where N + * is some small non-negative integer, starting with 0. Only one + * process may have the file descriptor open at a time. The calling + * userspace program needs to keep the device file descriptor open + * between the calls to write() and read() in order to preserve the + * response. Up to 32 bytes will be available for reading. + * + * For testing purposes, try requesting the EC's firmware build + * date, by sending the WILCO_EC_TELEM_GET_VERSION command with + * argument index=3. i.e. write [0x38, 0x00, 0x03] + * to the device node. An ASCII string of the build date is + * returned. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TELEM_DEV_NAME "wilco_telem" +#define TELEM_CLASS_NAME TELEM_DEV_NAME +#define DRV_NAME TELEM_DEV_NAME +#define TELEM_DEV_NAME_FMT (TELEM_DEV_NAME "%d") +static struct class telem_class = { + .owner = THIS_MODULE, + .name = TELEM_CLASS_NAME, +}; + +/* Keep track of all the device numbers used. */ +#define TELEM_MAX_DEV 128 +static int telem_major; +static DEFINE_IDA(telem_ida); + +/* EC telemetry command codes */ +#define WILCO_EC_TELEM_GET_LOG 0x99 +#define WILCO_EC_TELEM_GET_VERSION 0x38 +#define WILCO_EC_TELEM_GET_FAN_INFO 0x2E +#define WILCO_EC_TELEM_GET_DIAG_INFO 0xFA +#define WILCO_EC_TELEM_GET_TEMP_INFO 0x95 +#define WILCO_EC_TELEM_GET_TEMP_READ 0x2C +#define WILCO_EC_TELEM_GET_BATT_EXT_INFO 0x07 + +#define TELEM_ARGS_SIZE_MAX 30 + +/** + * struct wilco_ec_telem_request - Telemetry command and arguments sent to EC. + * @command: One of WILCO_EC_TELEM_GET_* command codes. + * @reserved: Must be 0. + * @args: The first N bytes are one of telem_args_get_* structs, the rest is 0. + */ +struct wilco_ec_telem_request { + u8 command; + u8 reserved; + u8 args[TELEM_ARGS_SIZE_MAX]; +} __packed; + +/* + * The following telem_args_get_* structs are embedded within the |args| field + * of wilco_ec_telem_request. + */ + +struct telem_args_get_log { + u8 log_type; + u8 log_index; +} __packed; + +/* + * Get a piece of info about the EC firmware version: + * 0 = label + * 1 = svn_rev + * 2 = model_no + * 3 = build_date + * 4 = frio_version + */ +struct telem_args_get_version { + u8 index; +} __packed; + +struct telem_args_get_fan_info { + u8 command; + u8 fan_number; + u8 arg; +} __packed; + +struct telem_args_get_diag_info { + u8 type; + u8 sub_type; +} __packed; + +struct telem_args_get_temp_info { + u8 command; + u8 index; + u8 field; + u8 zone; +} __packed; + +struct telem_args_get_temp_read { + u8 sensor_index; +} __packed; + +struct telem_args_get_batt_ext_info { + u8 var_args[5]; +} __packed; + +/** + * check_telem_request() - Ensure that a request from userspace is valid. + * @rq: Request buffer copied from userspace. + * @size: Number of bytes copied from userspace. + * + * Return: 0 if valid, -EINVAL if bad command or reserved byte is non-zero, + * -EMSGSIZE if the request is too long. + * + * We do not want to allow userspace to send arbitrary telemetry commands to + * the EC. Therefore we check to ensure that + * 1. The request follows the format of struct wilco_ec_telem_request. + * 2. The supplied command code is one of the whitelisted commands. + * 3. The request only contains the necessary data for the header and arguments. + */ +static int check_telem_request(struct wilco_ec_telem_request *rq, + size_t size) +{ + size_t max_size = offsetof(struct wilco_ec_telem_request, args); + + if (rq->reserved) + return -EINVAL; + + switch (rq->command) { + case WILCO_EC_TELEM_GET_LOG: + max_size += sizeof(struct telem_args_get_log); + break; + case WILCO_EC_TELEM_GET_VERSION: + max_size += sizeof(struct telem_args_get_version); + break; + case WILCO_EC_TELEM_GET_FAN_INFO: + max_size += sizeof(struct telem_args_get_fan_info); + break; + case WILCO_EC_TELEM_GET_DIAG_INFO: + max_size += sizeof(struct telem_args_get_diag_info); + break; + case WILCO_EC_TELEM_GET_TEMP_INFO: + max_size += sizeof(struct telem_args_get_temp_info); + break; + case WILCO_EC_TELEM_GET_TEMP_READ: + max_size += sizeof(struct telem_args_get_temp_read); + break; + case WILCO_EC_TELEM_GET_BATT_EXT_INFO: + max_size += sizeof(struct telem_args_get_batt_ext_info); + break; + default: + return -EINVAL; + } + + return (size <= max_size) ? 0 : -EMSGSIZE; +} + +/** + * struct telem_device_data - Data for a Wilco EC device that queries telemetry. + * @cdev: Char dev that userspace reads and polls from. + * @dev: Device associated with the %cdev. + * @ec: Wilco EC that we will be communicating with using the mailbox interface. + * @available: Boolean of if the device can be opened. + */ +struct telem_device_data { + struct device dev; + struct cdev cdev; + struct wilco_ec_device *ec; + atomic_t available; +}; + +#define TELEM_RESPONSE_SIZE EC_MAILBOX_DATA_SIZE + +/** + * struct telem_session_data - Data that exists between open() and release(). + * @dev_data: Pointer to get back to the device data and EC. + * @request: Command and arguments sent to EC. + * @response: Response buffer of data from EC. + * @has_msg: Is there data available to read from a previous write? + */ +struct telem_session_data { + struct telem_device_data *dev_data; + struct wilco_ec_telem_request request; + u8 response[TELEM_RESPONSE_SIZE]; + bool has_msg; +}; + +/** + * telem_open() - Callback for when the device node is opened. + * @inode: inode for this char device node. + * @filp: file for this char device node. + * + * We need to ensure that after writing a command to the device, + * the same userspace process reads the corresponding result. + * Therefore, we increment a refcount on opening the device, so that + * only one process can communicate with the EC at a time. + * + * Return: 0 on success, or negative error code on failure. + */ +static int telem_open(struct inode *inode, struct file *filp) +{ + struct telem_device_data *dev_data; + struct telem_session_data *sess_data; + + /* Ensure device isn't already open */ + dev_data = container_of(inode->i_cdev, struct telem_device_data, cdev); + if (atomic_cmpxchg(&dev_data->available, 1, 0) == 0) + return -EBUSY; + + get_device(&dev_data->dev); + + sess_data = kzalloc(sizeof(*sess_data), GFP_KERNEL); + if (!sess_data) { + atomic_set(&dev_data->available, 1); + return -ENOMEM; + } + sess_data->dev_data = dev_data; + sess_data->has_msg = false; + + nonseekable_open(inode, filp); + filp->private_data = sess_data; + + return 0; +} + +static ssize_t telem_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct telem_session_data *sess_data = filp->private_data; + struct wilco_ec_message msg = {}; + int ret; + + if (count > sizeof(sess_data->request)) + return -EMSGSIZE; + if (copy_from_user(&sess_data->request, buf, count)) + return -EFAULT; + ret = check_telem_request(&sess_data->request, count); + if (ret < 0) + return ret; + + memset(sess_data->response, 0, sizeof(sess_data->response)); + msg.type = WILCO_EC_MSG_TELEMETRY; + msg.request_data = &sess_data->request; + msg.request_size = sizeof(sess_data->request); + msg.response_data = sess_data->response; + msg.response_size = sizeof(sess_data->response); + + ret = wilco_ec_mailbox(sess_data->dev_data->ec, &msg); + if (ret < 0) + return ret; + if (ret != sizeof(sess_data->response)) + return -EMSGSIZE; + + sess_data->has_msg = true; + + return count; +} + +static ssize_t telem_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct telem_session_data *sess_data = filp->private_data; + + if (!sess_data->has_msg) + return -ENODATA; + if (count > sizeof(sess_data->response)) + return -EINVAL; + + if (copy_to_user(buf, sess_data->response, count)) + return -EFAULT; + + sess_data->has_msg = false; + + return count; +} + +static int telem_release(struct inode *inode, struct file *filp) +{ + struct telem_session_data *sess_data = filp->private_data; + + atomic_set(&sess_data->dev_data->available, 1); + put_device(&sess_data->dev_data->dev); + kfree(sess_data); + + return 0; +} + +static const struct file_operations telem_fops = { + .open = telem_open, + .write = telem_write, + .read = telem_read, + .release = telem_release, + .llseek = no_llseek, + .owner = THIS_MODULE, +}; + +/** + * telem_device_free() - Callback to free the telem_device_data structure. + * @d: The device embedded in our device data, which we have been ref counting. + * + * Once all open file descriptors are closed and the device has been removed, + * the refcount of the device will fall to 0 and this will be called. + */ +static void telem_device_free(struct device *d) +{ + struct telem_device_data *dev_data; + + dev_data = container_of(d, struct telem_device_data, dev); + kfree(dev_data); +} + +/** + * telem_device_probe() - Callback when creating a new device. + * @pdev: platform device that we will be receiving telems from. + * + * This finds a free minor number for the device, allocates and initializes + * some device data, and creates a new device and char dev node. + * + * Return: 0 on success, negative error code on failure. + */ +static int telem_device_probe(struct platform_device *pdev) +{ + struct telem_device_data *dev_data; + int error, minor; + + /* Get the next available device number */ + minor = ida_alloc_max(&telem_ida, TELEM_MAX_DEV-1, GFP_KERNEL); + if (minor < 0) { + error = minor; + dev_err(&pdev->dev, "Failed to find minor number: %d", error); + return error; + } + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) { + ida_simple_remove(&telem_ida, minor); + return -ENOMEM; + } + + /* Initialize the device data */ + dev_data->ec = dev_get_platdata(&pdev->dev); + atomic_set(&dev_data->available, 1); + platform_set_drvdata(pdev, dev_data); + + /* Initialize the device */ + dev_data->dev.devt = MKDEV(telem_major, minor); + dev_data->dev.class = &telem_class; + dev_data->dev.release = telem_device_free; + dev_set_name(&dev_data->dev, TELEM_DEV_NAME_FMT, minor); + device_initialize(&dev_data->dev); + + /* Initialize the character device and add it to userspace */; + cdev_init(&dev_data->cdev, &telem_fops); + error = cdev_device_add(&dev_data->cdev, &dev_data->dev); + if (error) { + put_device(&dev_data->dev); + ida_simple_remove(&telem_ida, minor); + return error; + } + + return 0; +} + +static int telem_device_remove(struct platform_device *pdev) +{ + struct telem_device_data *dev_data = platform_get_drvdata(pdev); + + cdev_device_del(&dev_data->cdev, &dev_data->dev); + put_device(&dev_data->dev); + ida_simple_remove(&telem_ida, MINOR(dev_data->dev.devt)); + + return 0; +} + +static struct platform_driver telem_driver = { + .probe = telem_device_probe, + .remove = telem_device_remove, + .driver = { + .name = DRV_NAME, + }, +}; + +static int __init telem_module_init(void) +{ + dev_t dev_num = 0; + int ret; + + ret = class_register(&telem_class); + if (ret) { + pr_err(DRV_NAME ": Failed registering class: %d", ret); + return ret; + } + + /* Request the kernel for device numbers, starting with minor=0 */ + ret = alloc_chrdev_region(&dev_num, 0, TELEM_MAX_DEV, TELEM_DEV_NAME); + if (ret) { + pr_err(DRV_NAME ": Failed allocating dev numbers: %d", ret); + goto destroy_class; + } + telem_major = MAJOR(dev_num); + + ret = platform_driver_register(&telem_driver); + if (ret < 0) { + pr_err(DRV_NAME ": Failed registering driver: %d\n", ret); + goto unregister_region; + } + + return 0; + +unregister_region: + unregister_chrdev_region(MKDEV(telem_major, 0), TELEM_MAX_DEV); +destroy_class: + class_unregister(&telem_class); + ida_destroy(&telem_ida); + return ret; +} + +static void __exit telem_module_exit(void) +{ + platform_driver_unregister(&telem_driver); + unregister_chrdev_region(MKDEV(telem_major, 0), TELEM_MAX_DEV); + class_unregister(&telem_class); + ida_destroy(&telem_ida); +} + +module_init(telem_module_init); +module_exit(telem_module_exit); + +MODULE_AUTHOR("Nick Crews "); +MODULE_DESCRIPTION("Wilco EC telemetry driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index e3ce9ce49b11..ad03b586a095 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -29,6 +29,7 @@ * @data_size: Size of the data buffer used for EC communication. * @debugfs_pdev: The child platform_device used by the debugfs sub-driver. * @rtc_pdev: The child platform_device used by the RTC sub-driver. + * @telem_pdev: The child platform_device used by the telemetry sub-driver. */ struct wilco_ec_device { struct device *dev; @@ -40,6 +41,7 @@ struct wilco_ec_device { size_t data_size; struct platform_device *debugfs_pdev; struct platform_device *rtc_pdev; + struct platform_device *telem_pdev; }; /** -- cgit v1.2.3 From 4998f1efd1904dd21697aeeead270e3eb97691dd Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 3 Jun 2019 18:53:43 +0800 Subject: usb: Add devaddr in struct usb_device The Clear_TT_Buffer request sent to the hub includes the address of the LS/FS child device in wValue field. usb_hub_clear_tt_buffer() uses udev->devnum to set the address wValue. This won't work for devices connected to xHC. For other host controllers udev->devnum is the same as the address of the usb device, chosen and set by usb core. With xHC the controller hardware assigns the address, and won't be the same as devnum. Here we add devaddr in "struct usb_device" for usb_hub_clear_tt_buffer() to use. Signed-off-by: Jim Lin Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 +++- drivers/usb/host/xhci.c | 2 ++ include/linux/usb.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 572e8c26a129..82cc3766cb23 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -873,7 +873,7 @@ int usb_hub_clear_tt_buffer(struct urb *urb) /* info that CLEAR_TT_BUFFER needs */ clear->tt = tt->multi ? udev->ttport : 1; clear->devinfo = usb_pipeendpoint (pipe); - clear->devinfo |= udev->devnum << 4; + clear->devinfo |= ((u16)udev->devaddr) << 4; clear->devinfo |= usb_pipecontrol(pipe) ? (USB_ENDPOINT_XFER_CONTROL << 11) : (USB_ENDPOINT_XFER_BULK << 11); @@ -2125,6 +2125,8 @@ static void update_devnum(struct usb_device *udev, int devnum) /* The address for a WUSB device is managed by wusbcore. */ if (!udev->wusb) udev->devnum = devnum; + if (!udev->devaddr) + udev->devaddr = (u8)devnum; } static void hub_free_dev(struct usb_device *udev) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 20db378a6012..4f92643e3a4c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4125,6 +4125,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, /* Zero the input context control for later use */ ctrl_ctx->add_flags = 0; ctrl_ctx->drop_flags = 0; + slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); + udev->devaddr = (u8)(le32_to_cpu(slot_ctx->dev_state) & DEV_ADDR_MASK); xhci_dbg_trace(xhci, trace_xhci_dbg_address, "Internal device address = %d", diff --git a/include/linux/usb.h b/include/linux/usb.h index ae82d9d1112b..83d35d993e8c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -578,6 +578,7 @@ struct usb3_lpm_parameters { * @bus_mA: Current available from the bus * @portnum: parent port number (origin 1) * @level: number of USB hub ancestors + * @devaddr: device address, XHCI: assigned by HW, others: same as devnum * @can_submit: URBs may be submitted * @persist_enabled: USB_PERSIST enabled for this device * @have_langid: whether string_langid is valid @@ -661,6 +662,7 @@ struct usb_device { unsigned short bus_mA; u8 portnum; u8 level; + u8 devaddr; unsigned can_submit:1; unsigned persist_enabled:1; -- cgit v1.2.3 From 32a6cfdfd168982cd7cd2898372da5eb49e56daf Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 5 Jun 2019 15:16:21 +0900 Subject: usb: renesas_usbhs: remove sudmac support SUDMAC feature was supported in v3.10, but was never used by any platform. So, this patch removes it. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 6 +----- include/linux/usb/renesas_usbhs.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 452b456ac24e..e84d2ac2a30a 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -12,7 +12,6 @@ #include "pipe.h" #define usbhsf_get_cfifo(p) (&((p)->fifo_info.cfifo)) -#define usbhsf_is_cfifo(p, f) (usbhsf_get_cfifo(p) == f) #define usbhsf_fifo_is_busy(f) ((f)->pipe) /* see usbhs_pipe_select_fifo */ @@ -325,10 +324,7 @@ static int usbhsf_fifo_select(struct usbhs_pipe *pipe, } /* "base" will be used below */ - if (usbhs_get_dparam(priv, has_sudmac) && !usbhsf_is_cfifo(priv, fifo)) - usbhs_write(priv, fifo->sel, base); - else - usbhs_write(priv, fifo->sel, base | MBW_32); + usbhs_write(priv, fifo->sel, base | MBW_32); /* check ISEL and CURPIPE value */ while (timeout--) { diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 3f53043fb56b..a2481f4da841 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -187,7 +187,6 @@ struct renesas_usbhs_driver_param { * option: */ u32 has_otg:1; /* for controlling PWEN/EXTLP */ - u32 has_sudmac:1; /* for SUDMAC */ u32 has_usb_dmac:1; /* for USB-DMAC */ u32 runtime_pwctrl:1; u32 has_cnen:1; -- cgit v1.2.3 From e60e982375244026ca46feeba0fb5bb4d51b5a67 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 5 Jun 2019 15:16:22 +0900 Subject: usb: renesas_usbhs: remove controlling PWEN/EXTLP support Controlling PWMEN/EXTLP (named as "has_otg") was supported in v3.2, but the last user (kzm9g) was removed by the commit 30f8925a57d8ad49 ("ARM: shmobile: Remove legacy board code for KZM-A9-GT"). So, this patch remove it. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 4 ---- include/linux/usb/renesas_usbhs.h | 1 - 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index c7c9c5d75a56..a501ea609019 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -95,10 +95,6 @@ void usbhs_sys_host_ctrl(struct usbhs_priv *priv, int enable) { u16 mask = DCFM | DRPD | DPRPU | HSE | USBE; u16 val = DCFM | DRPD | HSE | USBE; - int has_otg = usbhs_get_dparam(priv, has_otg); - - if (has_otg) - usbhs_bset(priv, DVSTCTR, (EXTLP | PWEN), (EXTLP | PWEN)); /* * if enable diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index a2481f4da841..b2cba7c74444 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -186,7 +186,6 @@ struct renesas_usbhs_driver_param { /* * option: */ - u32 has_otg:1; /* for controlling PWEN/EXTLP */ u32 has_usb_dmac:1; /* for USB-DMAC */ u32 runtime_pwctrl:1; u32 has_cnen:1; -- cgit v1.2.3 From 0d9ce162cf46c99628cc5da9510b959c7976735b Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 3 Jan 2019 17:14:28 -0800 Subject: kvm: Convert kvm_lock to a mutex It doesn't seem as if there is any particular need for kvm_lock to be a spinlock, so convert the lock to a mutex so that sleepable functions (in particular cond_resched()) can be called while holding it. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 4 +--- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 14 +++++++------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 30 +++++++++++++++--------------- 6 files changed, 28 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7936af0a971f..0fef9192f6ac 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2423,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95ac393e2959..3384c539d150 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5956,7 +5956,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5998,7 +5998,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10feed6a01eb..6200d5a51f13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6719,7 +6719,7 @@ static void kvm_hyperv_tsc_notifier(void) struct kvm_vcpu *vcpu; int cpu; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); @@ -6745,7 +6745,7 @@ static void kvm_hyperv_tsc_notifier(void) spin_unlock(&ka->pvclock_gtod_sync_lock); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } #endif @@ -6796,17 +6796,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) smp_call_function_single(cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6929,12 +6929,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e9fd7ad8018..abafddb9fe2c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -162,7 +162,7 @@ static inline bool is_error_page(struct page *page) extern struct kmem_cache *kvm_vcpu_cache; -extern spinlock_t kvm_lock; +extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2579841263f..9613987ef4c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -683,9 +683,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); @@ -731,9 +731,9 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -4034,13 +4034,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4053,12 +4053,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4073,13 +4073,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4092,12 +4092,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4118,7 +4118,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -4127,7 +4127,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); if (!env) -- cgit v1.2.3 From 1e390478cfb527e34c9ab89ba57212cb05c33c51 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 5 Jun 2019 10:46:05 +0200 Subject: gpu: host1x: Increase maximum DMA segment size Recent versions of the DMA API debug code have started to warn about violations of the maximum DMA segment size. This is because the segment size defaults to 64 KiB, which can easily be exceeded in large buffer allocations such as used in DRM/KMS for framebuffers. Technically the Tegra SMMU and ARM SMMU don't have a maximum segment size (they map individual pages irrespective of whether they are contiguous or not), so the choice of 4 MiB is a bit arbitrary here. The maximum segment size is a 32-bit unsigned integer, though, so we can't set it to the correct maximum size, which would be the size of the aperture. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 3 +++ include/linux/host1x.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 103fffc1904b..c9a637d9417e 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -425,6 +425,9 @@ static int host1x_device_add(struct host1x *host1x, of_dma_configure(&device->dev, host1x->dev->of_node, true); + device->dev.dma_parms = &device->dma_parms; + dma_set_max_seg_size(&device->dev, SZ_4M); + err = host1x_device_parse_dt(device, driver); if (err < 0) { kfree(device); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 89110d896d72..aef6e2f73802 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -310,6 +310,8 @@ struct host1x_device { struct list_head clients; bool registered; + + struct device_dma_parameters dma_parms; }; static inline struct host1x_device *to_host1x_device(struct device *dev) -- cgit v1.2.3 From 2bd229df5e2ecbc13909f71dbd196fced1d533ca Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 4 Jun 2019 23:02:34 +0200 Subject: net: phy: remove state PHY_FORCING In the early days of phylib we had a functionality that changed to the next lower speed in fixed mode if no link was established after a certain period of time. This functionality has been removed years ago, and state PHY_FORCING isn't needed any longer. Instead we can go from UP to RUNNING or NOLINK directly (same as in autoneg mode). Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 26 ++------------------------ include/linux/phy.h | 11 ----------- 2 files changed, 2 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0084220d10dc..d9150765009e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -43,7 +43,6 @@ static const char *phy_state_to_str(enum phy_state st) PHY_STATE_STR(UP) PHY_STATE_STR(RUNNING) PHY_STATE_STR(NOLINK) - PHY_STATE_STR(FORCING) PHY_STATE_STR(HALTED) } @@ -577,15 +576,8 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (phy_is_started(phydev)) { - if (phydev->autoneg == AUTONEG_ENABLE) { - err = phy_check_link_status(phydev); - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; - } - } - + if (phy_is_started(phydev)) + err = phy_check_link_status(phydev); out_unlock: mutex_unlock(&phydev->lock); @@ -951,20 +943,6 @@ void phy_state_machine(struct work_struct *work) case PHY_RUNNING: err = phy_check_link_status(phydev); break; - case PHY_FORCING: - err = genphy_update_link(phydev); - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - phy_link_up(phydev); - } else { - if (0 == phydev->link_timeout--) - needs_aneg = true; - phy_link_down(phydev, false); - } - break; case PHY_HALTED: if (phydev->link) { phydev->link = 0; diff --git a/include/linux/phy.h b/include/linux/phy.h index dc4b51060ebc..8caaf76685cd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -297,12 +297,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * - irq or timer will set RUNNING if link comes back * - phy_stop moves to HALTED * - * FORCING: PHY is being configured with forced settings - * - if link is up, move to RUNNING - * - If link is down, we drop to the next highest setting, and - * retry (FORCING) after a timeout - * - phy_stop moves to HALTED - * * RUNNING: PHY is currently up, running, and possibly sending * and/or receiving packets * - irq or timer will set NOLINK if link goes down @@ -319,7 +313,6 @@ enum phy_state { PHY_UP, PHY_RUNNING, PHY_NOLINK, - PHY_FORCING, }; /** @@ -347,8 +340,6 @@ struct phy_c45_device_ids { * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. - * link_timeout: The number of timer firings to wait before the - * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine * attached_dev: The attached enet driver's device instance ptr @@ -416,8 +407,6 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; - int link_timeout; - #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; -- cgit v1.2.3 From 9c3cef54c50d93871eaa46c28a06de8bd03fab63 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Jun 2019 11:34:28 +0200 Subject: VMCI: Fixup atomic64_t abuse The VMCI driver is abusing atomic64_t and atomic_t, there is no actual atomic RmW operations around. Rewrite the code to use a regular u64 with READ_ONCE() and WRITE_ONCE() and a cast to 'unsigned long'. This fully preserves whatever broken there was (it's not endian-safe for starters, and also looks to be missing ordering). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 0c06178e4985..8ee472118f54 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -438,8 +438,8 @@ enum { struct vmci_queue_header { /* All fields are 64bit and aligned. */ struct vmci_handle handle; /* Identifier. */ - atomic64_t producer_tail; /* Offset in this queue. */ - atomic64_t consumer_head; /* Offset in peer queue. */ + u64 producer_tail; /* Offset in this queue. */ + u64 consumer_head; /* Offset in peer queue. */ }; /* @@ -740,13 +740,9 @@ static inline void *vmci_event_data_payload(struct vmci_event_data *ev_data) * prefix will be used, so correctness isn't an issue, but using a * 64bit operation still adds unnecessary overhead. */ -static inline u64 vmci_q_read_pointer(atomic64_t *var) +static inline u64 vmci_q_read_pointer(u64 *var) { -#if defined(CONFIG_X86_32) - return atomic_read((atomic_t *)var); -#else - return atomic64_read(var); -#endif + return READ_ONCE(*(unsigned long *)var); } /* @@ -755,23 +751,17 @@ static inline u64 vmci_q_read_pointer(atomic64_t *var) * never exceeds a 32bit value in this case. On 32bit SMP, using a * locked cmpxchg8b adds unnecessary overhead. */ -static inline void vmci_q_set_pointer(atomic64_t *var, - u64 new_val) +static inline void vmci_q_set_pointer(u64 *var, u64 new_val) { -#if defined(CONFIG_X86_32) - return atomic_set((atomic_t *)var, (u32)new_val); -#else - return atomic64_set(var, new_val); -#endif + /* XXX buggered on big-endian */ + WRITE_ONCE(*(unsigned long *)var, (unsigned long)new_val); } /* * Helper to add a given offset to a head or tail pointer. Wraps the * value of the pointer around the max size of the queue. */ -static inline void vmci_qp_add_pointer(atomic64_t *var, - size_t add, - u64 size) +static inline void vmci_qp_add_pointer(u64 *var, size_t add, u64 size) { u64 new_val = vmci_q_read_pointer(var); @@ -848,8 +838,8 @@ static inline void vmci_q_header_init(struct vmci_queue_header *q_header, const struct vmci_handle handle) { q_header->handle = handle; - atomic64_set(&q_header->producer_tail, 0); - atomic64_set(&q_header->consumer_head, 0); + q_header->producer_tail = 0; + q_header->consumer_head = 0; } /* -- cgit v1.2.3 From 9c523efb749de01f0ec172d1160bb6ef8d1620a4 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 5 Jun 2019 07:56:31 -0500 Subject: mfd: ti-lmu: Add LM36274 support to the ti-lmu Add the LM36274 register support to the ti-lmu MFD driver. Signed-off-by: Dan Murphy Acked-by: Lee Jones Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- drivers/mfd/Kconfig | 5 ++--- drivers/mfd/ti-lmu.c | 14 ++++++++++++++ include/linux/mfd/ti-lmu-register.h | 23 +++++++++++++++++++++++ include/linux/mfd/ti-lmu.h | 4 ++++ 4 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8933485b28e7..a69aca3c2dab 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1335,9 +1335,8 @@ config MFD_TI_LMU select REGMAP_I2C help Say yes here to enable support for TI LMU chips. - - TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, and LM3695. - It consists of backlight, LED and regulator driver. + TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, LM3695 and + LM36274. It consists of backlight, LED and regulator driver. It provides consistent device controls for lighting functions. config MFD_OMAP_USB_HOST diff --git a/drivers/mfd/ti-lmu.c b/drivers/mfd/ti-lmu.c index 89b1c5b584af..691ab9dd6236 100644 --- a/drivers/mfd/ti-lmu.c +++ b/drivers/mfd/ti-lmu.c @@ -111,6 +111,17 @@ static const struct mfd_cell lm3695_devices[] = { }, }; +static const struct mfd_cell lm36274_devices[] = { + LM363X_REGULATOR(LM36274_BOOST), + LM363X_REGULATOR(LM36274_LDO_POS), + LM363X_REGULATOR(LM36274_LDO_NEG), + { + .name = "lm36274-leds", + .id = LM36274, + .of_compatible = "ti,lm36274-backlight", + }, +}; + #define TI_LMU_DATA(chip, max_reg) \ static const struct ti_lmu_data chip##_data = \ { \ @@ -123,6 +134,7 @@ TI_LMU_DATA(lm3631, LM3631_MAX_REG); TI_LMU_DATA(lm3632, LM3632_MAX_REG); TI_LMU_DATA(lm3633, LM3633_MAX_REG); TI_LMU_DATA(lm3695, LM3695_MAX_REG); +TI_LMU_DATA(lm36274, LM36274_MAX_REG); static int ti_lmu_probe(struct i2c_client *cl, const struct i2c_device_id *id) { @@ -191,6 +203,7 @@ static const struct of_device_id ti_lmu_of_match[] = { { .compatible = "ti,lm3632", .data = &lm3632_data }, { .compatible = "ti,lm3633", .data = &lm3633_data }, { .compatible = "ti,lm3695", .data = &lm3695_data }, + { .compatible = "ti,lm36274", .data = &lm36274_data }, { } }; MODULE_DEVICE_TABLE(of, ti_lmu_of_match); @@ -200,6 +213,7 @@ static const struct i2c_device_id ti_lmu_ids[] = { { "lm3632", LM3632 }, { "lm3633", LM3633 }, { "lm3695", LM3695 }, + { "lm36274", LM36274 }, { } }; MODULE_DEVICE_TABLE(i2c, ti_lmu_ids); diff --git a/include/linux/mfd/ti-lmu-register.h b/include/linux/mfd/ti-lmu-register.h index 76998b01764b..076d8dea38fd 100644 --- a/include/linux/mfd/ti-lmu-register.h +++ b/include/linux/mfd/ti-lmu-register.h @@ -189,4 +189,27 @@ #define LM3695_REG_BRT_MSB 0x14 #define LM3695_MAX_REG 0x14 + +/* LM36274 */ +#define LM36274_REG_REV 0x01 +#define LM36274_REG_BL_CFG_1 0x02 +#define LM36274_REG_BL_CFG_2 0x03 +#define LM36274_REG_BRT_LSB 0x04 +#define LM36274_REG_BRT_MSB 0x05 +#define LM36274_REG_BL_EN 0x08 + +#define LM36274_REG_BIAS_CONFIG_1 0x09 +#define LM36274_EXT_EN_MASK BIT(0) +#define LM36274_EN_VNEG_MASK BIT(1) +#define LM36274_EN_VPOS_MASK BIT(2) + +#define LM36274_REG_BIAS_CONFIG_2 0x0a +#define LM36274_REG_BIAS_CONFIG_3 0x0b +#define LM36274_REG_VOUT_BOOST 0x0c +#define LM36274_REG_VOUT_POS 0x0d +#define LM36274_REG_VOUT_NEG 0x0e +#define LM36274_VOUT_MASK 0x3F + +#define LM36274_MAX_REG 0x13 + #endif diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h index 54e9d272e81c..0957598c7d41 100644 --- a/include/linux/mfd/ti-lmu.h +++ b/include/linux/mfd/ti-lmu.h @@ -26,6 +26,7 @@ enum ti_lmu_id { LM3632, LM3633, LM3695, + LM36274, LMU_MAX_ID, }; @@ -67,6 +68,9 @@ enum lm363x_regulator_id { LM3632_BOOST, /* Boost output */ LM3632_LDO_POS, /* Positive display bias output */ LM3632_LDO_NEG, /* Negative display bias output */ + LM36274_BOOST, /* Boost output */ + LM36274_LDO_POS, /* Positive display bias output */ + LM36274_LDO_NEG, /* Negative display bias output */ }; /** -- cgit v1.2.3 From 2076e5c0451ca943ff8ecc6def7239c84c77e070 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 6 May 2019 16:29:38 -0700 Subject: mm/hmm: update HMM documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the HMM documentation to reflect the latest API and make a few minor wording changes. Cc: John Hubbard Cc: Ira Weiny Cc: Dan Williams Cc: Arnd Bergmann Cc: Balbir Singh Cc: Dan Carpenter Cc: Matthew Wilcox Cc: Souptick Joarder Cc: Andrew Morton Signed-off-by: Ralph Campbell Reviewed-by: Jérôme Glisse Signed-off-by: Jason Gunthorpe --- Documentation/vm/hmm.rst | 141 +++++++++++++++++++++++++---------------------- include/linux/hmm.h | 7 ++- 2 files changed, 78 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index 7cdf7282e022..7b6eeda5a7c0 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -10,7 +10,7 @@ of this being specialized struct page for such memory (see sections 5 to 7 of this document). HMM also provides optional helpers for SVM (Share Virtual Memory), i.e., -allowing a device to transparently access program address coherently with +allowing a device to transparently access program addresses coherently with the CPU meaning that any valid pointer on the CPU is also a valid pointer for the device. This is becoming mandatory to simplify the use of advanced heterogeneous computing where GPU, DSP, or FPGA are used to perform various @@ -22,8 +22,8 @@ expose the hardware limitations that are inherent to many platforms. The third section gives an overview of the HMM design. The fourth section explains how CPU page-table mirroring works and the purpose of HMM in this context. The fifth section deals with how device memory is represented inside the kernel. -Finally, the last section presents a new migration helper that allows lever- -aging the device DMA engine. +Finally, the last section presents a new migration helper that allows +leveraging the device DMA engine. .. contents:: :local: @@ -39,20 +39,20 @@ address space. I use shared address space to refer to the opposite situation: i.e., one in which any application memory region can be used by a device transparently. -Split address space happens because device can only access memory allocated -through device specific API. This implies that all memory objects in a program +Split address space happens because devices can only access memory allocated +through a device specific API. This implies that all memory objects in a program are not equal from the device point of view which complicates large programs that rely on a wide set of libraries. -Concretely this means that code that wants to leverage devices like GPUs needs -to copy object between generically allocated memory (malloc, mmap private, mmap +Concretely, this means that code that wants to leverage devices like GPUs needs +to copy objects between generically allocated memory (malloc, mmap private, mmap share) and memory allocated through the device driver API (this still ends up with an mmap but of the device file). For flat data sets (array, grid, image, ...) this isn't too hard to achieve but -complex data sets (list, tree, ...) are hard to get right. Duplicating a +for complex data sets (list, tree, ...) it's hard to get right. Duplicating a complex data set needs to re-map all the pointer relations between each of its -elements. This is error prone and program gets harder to debug because of the +elements. This is error prone and programs get harder to debug because of the duplicate data set and addresses. Split address space also means that libraries cannot transparently use data @@ -77,12 +77,12 @@ I/O bus, device memory characteristics I/O buses cripple shared address spaces due to a few limitations. Most I/O buses only allow basic memory access from device to main memory; even cache -coherency is often optional. Access to device memory from CPU is even more +coherency is often optional. Access to device memory from a CPU is even more limited. More often than not, it is not cache coherent. If we only consider the PCIE bus, then a device can access main memory (often through an IOMMU) and be cache coherent with the CPUs. However, it only allows -a limited set of atomic operations from device on main memory. This is worse +a limited set of atomic operations from the device on main memory. This is worse in the other direction: the CPU can only access a limited range of the device memory and cannot perform atomic operations on it. Thus device memory cannot be considered the same as regular memory from the kernel point of view. @@ -93,20 +93,20 @@ The final limitation is latency. Access to main memory from the device has an order of magnitude higher latency than when the device accesses its own memory. Some platforms are developing new I/O buses or additions/modifications to PCIE -to address some of these limitations (OpenCAPI, CCIX). They mainly allow two- -way cache coherency between CPU and device and allow all atomic operations the +to address some of these limitations (OpenCAPI, CCIX). They mainly allow +two-way cache coherency between CPU and device and allow all atomic operations the architecture supports. Sadly, not all platforms are following this trend and some major architectures are left without hardware solutions to these problems. So for shared address space to make sense, not only must we allow devices to access any memory but we must also permit any memory to be migrated to device -memory while device is using it (blocking CPU access while it happens). +memory while the device is using it (blocking CPU access while it happens). Shared address space and migration ================================== -HMM intends to provide two main features. First one is to share the address +HMM intends to provide two main features. The first one is to share the address space by duplicating the CPU page table in the device page table so the same address points to the same physical memory for any valid main memory address in the process address space. @@ -121,14 +121,14 @@ why HMM provides helpers to factor out everything that can be while leaving the hardware specific details to the device driver. The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that -allows allocating a struct page for each page of the device memory. Those pages +allows allocating a struct page for each page of device memory. Those pages are special because the CPU cannot map them. However, they allow migrating main memory to device memory using existing migration mechanisms and everything -looks like a page is swapped out to disk from the CPU point of view. Using a -struct page gives the easiest and cleanest integration with existing mm mech- -anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE +looks like a page that is swapped out to disk from the CPU point of view. Using a +struct page gives the easiest and cleanest integration with existing mm +mechanisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE memory for the device memory and second to perform migration. Policy decisions -of what and when to migrate things is left to the device driver. +of what and when to migrate is left to the device driver. Note that any CPU access to a device page triggers a page fault and a migration back to main memory. For example, when a page backing a given CPU address A is @@ -136,8 +136,8 @@ migrated from a main memory page to a device page, then any CPU access to address A triggers a page fault and initiates a migration back to main memory. With these two features, HMM not only allows a device to mirror process address -space and keeping both CPU and device page table synchronized, but also lever- -ages device memory by migrating the part of the data set that is actively being +space and keeps both CPU and device page tables synchronized, but also +leverages device memory by migrating the part of the data set that is actively being used by the device. @@ -151,21 +151,28 @@ registration of an hmm_mirror struct:: int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); - int hmm_mirror_register_locked(struct hmm_mirror *mirror, - struct mm_struct *mm); - -The locked variant is to be used when the driver is already holding mmap_sem -of the mm in write mode. The mirror struct has a set of callbacks that are used +The mirror struct has a set of callbacks that are used to propagate CPU page tables:: struct hmm_mirror_ops { + /* release() - release hmm_mirror + * + * @mirror: pointer to struct hmm_mirror + * + * This is called when the mm_struct is being released. The callback + * must ensure that all access to any pages obtained from this mirror + * is halted before the callback returns. All future access should + * fault. + */ + void (*release)(struct hmm_mirror *mirror); + /* sync_cpu_device_pagetables() - synchronize page tables * * @mirror: pointer to struct hmm_mirror - * @update_type: type of update that occurred to the CPU page table - * @start: virtual start address of the range to update - * @end: virtual end address of the range to update + * @update: update information (see struct mmu_notifier_range) + * Return: -EAGAIN if update.blockable false and callback need to + * block, 0 otherwise. * * This callback ultimately originates from mmu_notifiers when the CPU * page table is updated. The device driver must update its page table @@ -176,14 +183,12 @@ to propagate CPU page tables:: * page tables are completely updated (TLBs flushed, etc); this is a * synchronous call. */ - void (*update)(struct hmm_mirror *mirror, - enum hmm_update action, - unsigned long start, - unsigned long end); + int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, + const struct hmm_update *update); }; The device driver must perform the update action to the range (mark range -read only, or fully unmap, ...). The device must be done with the update before +read only, or fully unmap, etc.). The device must complete the update before the driver callback returns. When the device driver wants to populate a range of virtual addresses, it can @@ -194,17 +199,18 @@ use either:: The first one (hmm_range_snapshot()) will only fetch present CPU page table entries and will not trigger a page fault on missing or non-present entries. -The second one does trigger a page fault on missing or read-only entry if the -write parameter is true. Page faults use the generic mm page fault code path -just like a CPU page fault. +The second one does trigger a page fault on missing or read-only entries if +write access is requested (see below). Page faults use the generic mm page +fault code path just like a CPU page fault. Both functions copy CPU page table entries into their pfns array argument. Each entry in that array corresponds to an address in the virtual range. HMM provides a set of flags to help the driver identify special CPU page table entries. -Locking with the update() callback is the most important aspect the driver must -respect in order to keep things properly synchronized. The usage pattern is:: +Locking within the sync_cpu_device_pagetables() callback is the most important +aspect the driver must respect in order to keep things properly synchronized. +The usage pattern is:: int driver_populate_range(...) { @@ -239,11 +245,11 @@ respect in order to keep things properly synchronized. The usage pattern is:: hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC); goto again; } - hmm_mirror_unregister(&range); + hmm_range_unregister(&range); return ret; } take_lock(driver->update); - if (!range.valid) { + if (!hmm_range_valid(&range)) { release_lock(driver->update); up_read(&mm->mmap_sem); goto again; @@ -251,15 +257,15 @@ respect in order to keep things properly synchronized. The usage pattern is:: // Use pfns array content to update device page table - hmm_mirror_unregister(&range); + hmm_range_unregister(&range); release_lock(driver->update); up_read(&mm->mmap_sem); return 0; } The driver->update lock is the same lock that the driver takes inside its -update() callback. That lock must be held before checking the range.valid -field to avoid any race with a concurrent CPU page table update. +sync_cpu_device_pagetables() callback. That lock must be held before calling +hmm_range_valid() to avoid any race with a concurrent CPU page table update. HMM implements all this on top of the mmu_notifier API because we wanted a simpler API and also to be able to perform optimizations latter on like doing @@ -279,46 +285,47 @@ concurrently). Leverage default_flags and pfn_flags_mask ========================================= -The hmm_range struct has 2 fields default_flags and pfn_flags_mask that allows -to set fault or snapshot policy for a whole range instead of having to set them -for each entries in the range. +The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specify +fault or snapshot policy for the whole range instead of having to set them +for each entry in the pfns array. + +For instance, if the device flags for range.flags are:: -For instance if the device flags for device entries are: - VALID (1 << 63) - WRITE (1 << 62) + range.flags[HMM_PFN_VALID] = (1 << 63); + range.flags[HMM_PFN_WRITE] = (1 << 62); -Now let say that device driver wants to fault with at least read a range then -it does set:: +and the device driver wants pages for a range with at least read permission, +it sets:: range->default_flags = (1 << 63); range->pfn_flags_mask = 0; -and calls hmm_range_fault() as described above. This will fill fault all page +and calls hmm_range_fault() as described above. This will fill fault all pages in the range with at least read permission. -Now let say driver wants to do the same except for one page in the range for -which its want to have write. Now driver set:: +Now let's say the driver wants to do the same except for one page in the range for +which it wants to have write permission. Now driver set:: range->default_flags = (1 << 63); range->pfn_flags_mask = (1 << 62); range->pfns[index_of_write] = (1 << 62); -With this HMM will fault in all page with at least read (ie valid) and for the +With this, HMM will fault in all pages with at least read (i.e., valid) and for the address == range->start + (index_of_write << PAGE_SHIFT) it will fault with -write permission ie if the CPU pte does not have write permission set then HMM +write permission i.e., if the CPU pte does not have write permission set then HMM will call handle_mm_fault(). -Note that HMM will populate the pfns array with write permission for any entry -that have write permission within the CPU pte no matter what are the values set +Note that HMM will populate the pfns array with write permission for any page +that is mapped with CPU write permission no matter what values are set in default_flags or pfn_flags_mask. Represent and manage device memory from core kernel point of view ================================================================= -Several different designs were tried to support device memory. First one used -a device specific data structure to keep information about migrated memory and -HMM hooked itself in various places of mm code to handle any access to +Several different designs were tried to support device memory. The first one +used a device specific data structure to keep information about migrated memory +and HMM hooked itself in various places of mm code to handle any access to addresses that were backed by device memory. It turns out that this ended up replicating most of the fields of struct page and also needed many kernel code paths to be updated to understand this new kind of memory. @@ -341,7 +348,7 @@ The hmm_devmem_ops is where most of the important things are:: struct hmm_devmem_ops { void (*free)(struct hmm_devmem *devmem, struct page *page); - int (*fault)(struct hmm_devmem *devmem, + vm_fault_t (*fault)(struct hmm_devmem *devmem, struct vm_area_struct *vma, unsigned long addr, struct page *page, @@ -417,9 +424,9 @@ willing to pay to keep all the code simpler. Memory cgroup (memcg) and rss accounting ======================================== -For now device memory is accounted as any regular page in rss counters (either +For now, device memory is accounted as any regular page in rss counters (either anonymous if device page is used for anonymous, file if device page is used for -file backed page or shmem if device page is used for shared memory). This is a +file backed page, or shmem if device page is used for shared memory). This is a deliberate choice to keep existing applications, that might start using device memory without knowing about it, running unimpacted. @@ -439,6 +446,6 @@ get more experience in how device memory is used and its impact on memory resource control. -Note that device memory can never be pinned by device driver nor through GUP +Note that device memory can never be pinned by a device driver nor through GUP and thus such memory is always free upon process exit. Or when last reference is dropped in case of shared memory or file backed memory. diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 044a36d7c3f8..740bb00853f5 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -418,9 +418,10 @@ struct hmm_mirror_ops { * * @mirror: pointer to struct hmm_mirror * - * This is called when the mm_struct is being released. - * The callback should make sure no references to the mirror occur - * after the callback returns. + * This is called when the mm_struct is being released. The callback + * must ensure that all access to any pages obtained from this mirror + * is halted before the callback returns. All future access should + * fault. */ void (*release)(struct hmm_mirror *mirror); -- cgit v1.2.3 From 085ea25064a9169eba5f2ed6484c111ab0f3ee79 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 6 May 2019 16:29:39 -0700 Subject: mm/hmm: clean up some coding style and comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no functional changes, just some coding style clean ups and minor comment changes. Cc: John Hubbard Cc: Ira Weiny Cc: Dan Williams Cc: Arnd Bergmann Cc: Balbir Singh Cc: Dan Carpenter Cc: Matthew Wilcox Cc: Souptick Joarder Cc: Andrew Morton Signed-off-by: Ralph Campbell Reviewed-by: Jérôme Glisse Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 71 +++++++++++++++++++++++++++-------------------------- mm/hmm.c | 62 ++++++++++++++++++++++++---------------------- 2 files changed, 68 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 740bb00853f5..7007123842ba 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -21,8 +21,8 @@ * * HMM address space mirroring API: * - * Use HMM address space mirroring if you want to mirror range of the CPU page - * table of a process into a device page table. Here, "mirror" means "keep + * Use HMM address space mirroring if you want to mirror a range of the CPU + * page tables of a process into a device page table. Here, "mirror" means "keep * synchronized". Prerequisites: the device must provide the ability to write- * protect its page tables (at PAGE_SIZE granularity), and must be able to * recover from the resulting potential page faults. @@ -105,10 +105,11 @@ struct hmm { * HMM_PFN_WRITE: CPU page table has write permission set * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) * - * The driver provide a flags array, if driver valid bit for an entry is bit - * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide + * The driver provides a flags array for mapping page protections to device + * PTE bits. If the driver valid bit for an entry is bit 3, + * i.e., (entry & (1 << 3)), then the driver must provide * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. - * Same logic apply to all flags. This is same idea as vm_page_prot in vma + * Same logic apply to all flags. This is the same idea as vm_page_prot in vma * except that this is per device driver rather than per architecture. */ enum hmm_pfn_flag_e { @@ -129,13 +130,13 @@ enum hmm_pfn_flag_e { * be mirrored by a device, because the entry will never have HMM_PFN_VALID * set and the pfn value is undefined. * - * Driver provide entry value for none entry, error entry and special entry, - * driver can alias (ie use same value for error and special for instance). It - * should not alias none and error or special. + * Driver provides values for none entry, error entry, and special entry. + * Driver can alias (i.e., use same value) error and special, but + * it should not alias none with error or special. * * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, - * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table + * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry, * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one */ enum hmm_pfn_value_e { @@ -158,6 +159,7 @@ enum hmm_pfn_value_e { * @values: pfn value for some special case (none, special, error, ...) * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter + * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT) * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) * @valid: pfns array did not change since it has been fill by an HMM function */ @@ -180,7 +182,7 @@ struct hmm_range { /* * hmm_range_page_shift() - return the page shift for the range * @range: range being queried - * Returns: page shift (page size = 1 << page shift) for the range + * Return: page shift (page size = 1 << page shift) for the range */ static inline unsigned hmm_range_page_shift(const struct hmm_range *range) { @@ -190,7 +192,7 @@ static inline unsigned hmm_range_page_shift(const struct hmm_range *range) /* * hmm_range_page_size() - return the page size for the range * @range: range being queried - * Returns: page size for the range in bytes + * Return: page size for the range in bytes */ static inline unsigned long hmm_range_page_size(const struct hmm_range *range) { @@ -201,7 +203,7 @@ static inline unsigned long hmm_range_page_size(const struct hmm_range *range) * hmm_range_wait_until_valid() - wait for range to be valid * @range: range affected by invalidation to wait on * @timeout: time out for wait in ms (ie abort wait after that period of time) - * Returns: true if the range is valid, false otherwise. + * Return: true if the range is valid, false otherwise. */ static inline bool hmm_range_wait_until_valid(struct hmm_range *range, unsigned long timeout) @@ -222,7 +224,7 @@ static inline bool hmm_range_wait_until_valid(struct hmm_range *range, /* * hmm_range_valid() - test if a range is valid or not * @range: range - * Returns: true if the range is valid, false otherwise. + * Return: true if the range is valid, false otherwise. */ static inline bool hmm_range_valid(struct hmm_range *range) { @@ -233,7 +235,7 @@ static inline bool hmm_range_valid(struct hmm_range *range) * hmm_device_entry_to_page() - return struct page pointed to by a device entry * @range: range use to decode device entry value * @entry: device entry value to get corresponding struct page from - * Returns: struct page pointer if entry is a valid, NULL otherwise + * Return: struct page pointer if entry is a valid, NULL otherwise * * If the device entry is valid (ie valid flag set) then return the struct page * matching the entry value. Otherwise return NULL. @@ -256,7 +258,7 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang * hmm_device_entry_to_pfn() - return pfn value store in a device entry * @range: range use to decode device entry value * @entry: device entry to extract pfn from - * Returns: pfn value if device entry is valid, -1UL otherwise + * Return: pfn value if device entry is valid, -1UL otherwise */ static inline unsigned long hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) @@ -276,7 +278,7 @@ hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) * hmm_device_entry_from_page() - create a valid device entry for a page * @range: range use to encode HMM pfn value * @page: page for which to create the device entry - * Returns: valid device entry for the page + * Return: valid device entry for the page */ static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, struct page *page) @@ -289,7 +291,7 @@ static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, * hmm_device_entry_from_pfn() - create a valid device entry value from pfn * @range: range use to encode HMM pfn value * @pfn: pfn value for which to create the device entry - * Returns: valid device entry for the pfn + * Return: valid device entry for the pfn */ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, unsigned long pfn) @@ -394,7 +396,7 @@ enum hmm_update_event { }; /* - * struct hmm_update - HMM update informations for callback + * struct hmm_update - HMM update information for callback * * @start: virtual start address of the range to update * @end: virtual end address of the range to update @@ -428,8 +430,8 @@ struct hmm_mirror_ops { /* sync_cpu_device_pagetables() - synchronize page tables * * @mirror: pointer to struct hmm_mirror - * @update: update informations (see struct hmm_update) - * Returns: -EAGAIN if update.blockable false and callback need to + * @update: update information (see struct hmm_update) + * Return: -EAGAIN if update.blockable false and callback need to * block, 0 otherwise. * * This callback ultimately originates from mmu_notifiers when the CPU @@ -468,13 +470,13 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); /* * hmm_mirror_mm_is_alive() - test if mm is still alive * @mirror: the HMM mm mirror for which we want to lock the mmap_sem - * Returns: false if the mm is dead, true otherwise + * Return: false if the mm is dead, true otherwise * - * This is an optimization it will not accurately always return -EINVAL if the - * mm is dead ie there can be false negative (process is being kill but HMM is - * not yet inform of that). It is only intented to be use to optimize out case - * where driver is about to do something time consuming and it would be better - * to skip it if the mm is dead. + * This is an optimization, it will not always accurately return false if the + * mm is dead; i.e., there can be false negatives (process is being killed but + * HMM is not yet informed of that). It is only intended to be used to optimize + * out cases where the driver is about to do something time consuming and it + * would be better to skip it if the mm is dead. */ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror) { @@ -489,7 +491,6 @@ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror) return true; } - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ @@ -562,7 +563,7 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block) ret = hmm_range_fault(range, block); if (ret <= 0) { if (ret == -EBUSY || !ret) { - /* Same as above drop mmap_sem to match old API. */ + /* Same as above, drop mmap_sem to match old API. */ up_read(&range->vma->vm_mm->mmap_sem); ret = -EBUSY; } else if (ret == -EAGAIN) @@ -629,7 +630,7 @@ struct hmm_devmem_ops { * @page: pointer to struct page backing virtual address (unreliable) * @flags: FAULT_FLAG_* (see include/linux/mm.h) * @pmdp: page middle directory - * Returns: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR + * Return: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR * on error * * The callback occurs whenever there is a CPU page fault or GUP on a @@ -637,14 +638,14 @@ struct hmm_devmem_ops { * page back to regular memory (CPU accessible). * * The device driver is free to migrate more than one page from the - * fault() callback as an optimization. However if device decide to - * migrate more than one page it must always priotirize the faulting + * fault() callback as an optimization. However if the device decides + * to migrate more than one page it must always priotirize the faulting * address over the others. * - * The struct page pointer is only given as an hint to allow quick + * The struct page pointer is only given as a hint to allow quick * lookup of internal device driver data. A concurrent migration - * might have already free that page and the virtual address might - * not longer be back by it. So it should not be modified by the + * might have already freed that page and the virtual address might + * no longer be backed by it. So it should not be modified by the * callback. * * Note that mmap semaphore is held in read mode at least when this @@ -671,7 +672,7 @@ struct hmm_devmem_ops { * @ref: per CPU refcount * @page_fault: callback when CPU fault on an unaddressable device page * - * This an helper structure for device drivers that do not wish to implement + * This is a helper structure for device drivers that do not wish to implement * the gory details related to hotplugging new memoy and allocating struct * pages. * diff --git a/mm/hmm.c b/mm/hmm.c index c62ae414a3a2..4db5dcf110ba 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -153,9 +153,8 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) /* Wake-up everyone waiting on any range. */ mutex_lock(&hmm->lock); - list_for_each_entry(range, &hmm->ranges, list) { + list_for_each_entry(range, &hmm->ranges, list) range->valid = false; - } wake_up_all(&hmm->wq); mutex_unlock(&hmm->lock); @@ -166,9 +165,10 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) list_del_init(&mirror->list); if (mirror->ops->release) { /* - * Drop mirrors_sem so callback can wait on any pending - * work that might itself trigger mmu_notifier callback - * and thus would deadlock with us. + * Drop mirrors_sem so the release callback can wait + * on any pending work that might itself trigger a + * mmu_notifier callback and thus would deadlock with + * us. */ up_write(&hmm->mirrors_sem); mirror->ops->release(mirror); @@ -223,11 +223,8 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, int ret; ret = mirror->ops->sync_cpu_device_pagetables(mirror, &update); - if (!update.blockable && ret == -EAGAIN) { - up_read(&hmm->mirrors_sem); - ret = -EAGAIN; - goto out; - } + if (!update.blockable && ret == -EAGAIN) + break; } up_read(&hmm->mirrors_sem); @@ -271,6 +268,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { * * @mirror: new mirror struct to register * @mm: mm to register against + * Return: 0 on success, -ENOMEM if no memory, -EINVAL if invalid arguments * * To start mirroring a process address space, the device driver must register * an HMM mirror struct. @@ -298,7 +296,7 @@ EXPORT_SYMBOL(hmm_mirror_register); /* * hmm_mirror_unregister() - unregister a mirror * - * @mirror: new mirror struct to register + * @mirror: mirror struct to unregister * * Stop mirroring a process address space, and cleanup. */ @@ -372,7 +370,7 @@ static int hmm_pfns_bad(unsigned long addr, * @fault: should we fault or not ? * @write_fault: write fault ? * @walk: mm_walk structure - * Returns: 0 on success, -EBUSY after page fault, or page fault error + * Return: 0 on success, -EBUSY after page fault, or page fault error * * This function will be called whenever pmd_none() or pte_none() returns true, * or whenever there is no page directory covering the virtual address range. @@ -911,6 +909,7 @@ int hmm_range_register(struct hmm_range *range, unsigned page_shift) { unsigned long mask = ((1UL << page_shift) - 1UL); + struct hmm *hmm; range->valid = false; range->hmm = NULL; @@ -924,28 +923,29 @@ int hmm_range_register(struct hmm_range *range, range->start = start; range->end = end; - range->hmm = hmm_get_or_create(mm); - if (!range->hmm) + hmm = hmm_get_or_create(mm); + if (!hmm) return -EFAULT; /* Check if hmm_mm_destroy() was call. */ - if (range->hmm->mm == NULL || range->hmm->dead) { - hmm_put(range->hmm); + if (hmm->mm == NULL || hmm->dead) { + hmm_put(hmm); return -EFAULT; } - /* Initialize range to track CPU page table update */ - mutex_lock(&range->hmm->lock); + /* Initialize range to track CPU page table updates. */ + mutex_lock(&hmm->lock); - list_add_rcu(&range->list, &range->hmm->ranges); + range->hmm = hmm; + list_add_rcu(&range->list, &hmm->ranges); /* * If there are any concurrent notifiers we have to wait for them for * the range to be valid (see hmm_range_wait_until_valid()). */ - if (!range->hmm->notifiers) + if (!hmm->notifiers) range->valid = true; - mutex_unlock(&range->hmm->lock); + mutex_unlock(&hmm->lock); return 0; } @@ -960,17 +960,19 @@ EXPORT_SYMBOL(hmm_range_register); */ void hmm_range_unregister(struct hmm_range *range) { + struct hmm *hmm = range->hmm; + /* Sanity check this really should not happen. */ - if (range->hmm == NULL || range->end <= range->start) + if (hmm == NULL || range->end <= range->start) return; - mutex_lock(&range->hmm->lock); + mutex_lock(&hmm->lock); list_del_rcu(&range->list); - mutex_unlock(&range->hmm->lock); + mutex_unlock(&hmm->lock); /* Drop reference taken by hmm_range_register() */ range->valid = false; - hmm_put(range->hmm); + hmm_put(hmm); range->hmm = NULL; } EXPORT_SYMBOL(hmm_range_unregister); @@ -978,7 +980,7 @@ EXPORT_SYMBOL(hmm_range_unregister); /* * hmm_range_snapshot() - snapshot CPU page table for a range * @range: range - * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid + * Return: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid * permission (for instance asking for write and range is read only), * -EAGAIN if you need to retry, -EFAULT invalid (ie either no valid * vma or it is illegal to access that range), number of valid pages @@ -1061,7 +1063,7 @@ EXPORT_SYMBOL(hmm_range_snapshot); * hmm_range_fault() - try to fault some address in a virtual address range * @range: range being faulted * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem) - * Returns: number of valid pages in range->pfns[] (from range start + * Return: number of valid pages in range->pfns[] (from range start * address). This may be zero. If the return value is negative, * then one of the following values may be returned: * @@ -1179,7 +1181,7 @@ EXPORT_SYMBOL(hmm_range_fault); * @device: device against to dma map page to * @daddrs: dma address of mapped pages * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem) - * Returns: number of pages mapped on success, -EAGAIN if mmap_sem have been + * Return: number of pages mapped on success, -EAGAIN if mmap_sem have been * drop and you need to try again, some other error value otherwise * * Note same usage pattern as hmm_range_fault(). @@ -1267,7 +1269,7 @@ EXPORT_SYMBOL(hmm_range_dma_map); * @device: device against which dma map was done * @daddrs: dma address of mapped pages * @dirty: dirty page if it had the write flag set - * Returns: number of page unmapped on success, -EINVAL otherwise + * Return: number of page unmapped on success, -EINVAL otherwise * * Note that caller MUST abide by mmu notifier or use HMM mirror and abide * to the sync_cpu_device_pagetables() callback so that it is safe here to @@ -1390,7 +1392,7 @@ static void hmm_devmem_free(struct page *page, void *data) * @ops: memory event device driver callback (see struct hmm_devmem_ops) * @device: device struct to bind the resource too * @size: size in bytes of the device memory to add - * Returns: pointer to new hmm_devmem struct ERR_PTR otherwise + * Return: pointer to new hmm_devmem struct ERR_PTR otherwise * * This function first finds an empty range of physical address big enough to * contain the new resource, and then hotplugs it as ZONE_DEVICE memory, which -- cgit v1.2.3 From 3e582c6e911ffe6c90b9f90324bdf85fc728d0c3 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Wed, 5 Jun 2019 11:07:04 +0800 Subject: spi: mediatek: add SPI_LSB_FIRST support this patch add SPI_LSB_FIRST feature support. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 15 ++++++--------- include/linux/platform_data/spi-mt65xx.h | 2 -- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 0cce6f0ba824..7f4dc1844789 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -131,8 +131,6 @@ static const struct mtk_spi_compatible mt8183_compat = { * supplies it. */ static const struct mtk_chip_config mtk_default_chip_info = { - .rx_mlsb = 1, - .tx_mlsb = 1, .cs_pol = 0, .sample_sel = 0, }; @@ -203,14 +201,13 @@ static int mtk_spi_prepare_message(struct spi_master *master, reg_val &= ~SPI_CMD_CPOL; /* set the mlsbx and mlsbtx */ - if (chip_config->tx_mlsb) - reg_val |= SPI_CMD_TXMSBF; - else + if (spi->mode & SPI_LSB_FIRST) { reg_val &= ~SPI_CMD_TXMSBF; - if (chip_config->rx_mlsb) - reg_val |= SPI_CMD_RXMSBF; - else reg_val &= ~SPI_CMD_RXMSBF; + } else { + reg_val |= SPI_CMD_TXMSBF; + reg_val |= SPI_CMD_RXMSBF; + } /* set the tx/rx endian */ #ifdef __LITTLE_ENDIAN @@ -607,7 +604,7 @@ static int mtk_spi_probe(struct platform_device *pdev) master->auto_runtime_pm = true; master->dev.of_node = pdev->dev.of_node; - master->mode_bits = SPI_CPOL | SPI_CPHA; + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST; master->set_cs = mtk_spi_set_cs; master->prepare_message = mtk_spi_prepare_message; diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h index ba4e4bb70262..8d5df58a13ef 100644 --- a/include/linux/platform_data/spi-mt65xx.h +++ b/include/linux/platform_data/spi-mt65xx.h @@ -14,8 +14,6 @@ /* Board specific platform_data */ struct mtk_chip_config { - u32 tx_mlsb; - u32 rx_mlsb; u32 cs_pol; u32 sample_sel; }; -- cgit v1.2.3 From 6445500b43129baac36c56d629cf1dd9e1104167 Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Thu, 6 Jun 2019 17:12:02 +0200 Subject: regmap: add i3c bus support Add basic support for i3c bus. This is a simple implementation that only give support for SDR Read and Write commands. Signed-off-by: Vitor Soares Signed-off-by: Mark Brown --- drivers/base/regmap/Kconfig | 6 +++- drivers/base/regmap/Makefile | 1 + drivers/base/regmap/regmap-i3c.c | 60 ++++++++++++++++++++++++++++++++++++++++ include/linux/regmap.h | 20 ++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 drivers/base/regmap/regmap-i3c.c (limited to 'include/linux') diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index 6ad5ef48b61e..c8bbf5322720 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -4,7 +4,7 @@ # subsystems should select the appropriate symbols. config REGMAP - default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ) + default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_I3C) select IRQ_DOMAIN if REGMAP_IRQ bool @@ -49,3 +49,7 @@ config REGMAP_SOUNDWIRE config REGMAP_SCCB tristate depends on I2C + +config REGMAP_I3C + tristate + depends on I3C diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile index f5b4e8851d00..ff6c7d8ec1cd 100644 --- a/drivers/base/regmap/Makefile +++ b/drivers/base/regmap/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o obj-$(CONFIG_REGMAP_W1) += regmap-w1.o obj-$(CONFIG_REGMAP_SOUNDWIRE) += regmap-sdw.o obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o +obj-$(CONFIG_REGMAP_I3C) += regmap-i3c.o diff --git a/drivers/base/regmap/regmap-i3c.c b/drivers/base/regmap/regmap-i3c.c new file mode 100644 index 000000000000..1578fb506683 --- /dev/null +++ b/drivers/base/regmap/regmap-i3c.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. + +#include +#include +#include +#include + +static int regmap_i3c_write(void *context, const void *data, size_t count) +{ + struct device *dev = context; + struct i3c_device *i3c = dev_to_i3cdev(dev); + struct i3c_priv_xfer xfers[] = { + { + .rnw = false, + .len = count, + .data.out = data, + }, + }; + + return i3c_device_do_priv_xfers(i3c, xfers, 1); +} + +static int regmap_i3c_read(void *context, + const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + struct device *dev = context; + struct i3c_device *i3c = dev_to_i3cdev(dev); + struct i3c_priv_xfer xfers[2]; + + xfers[0].rnw = false; + xfers[0].len = reg_size; + xfers[0].data.out = reg; + + xfers[1].rnw = true; + xfers[1].len = val_size; + xfers[1].data.in = val; + + return i3c_device_do_priv_xfers(i3c, xfers, 2); +} + +static struct regmap_bus regmap_i3c = { + .write = regmap_i3c_write, + .read = regmap_i3c_read, +}; + +struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) +{ + return __devm_regmap_init(&i3c->dev, ®map_i3c, &i3c->dev, config, + lock_key, lock_name); +} +EXPORT_SYMBOL_GPL(__devm_regmap_init_i3c); + +MODULE_AUTHOR("Vitor Soares "); +MODULE_DESCRIPTION("Regmap I3C Module"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index daeec7dbd65c..f65984d98b07 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -25,6 +25,7 @@ struct module; struct clk; struct device; struct i2c_client; +struct i3c_device; struct irq_domain; struct slim_device; struct spi_device; @@ -624,6 +625,10 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); /* * Wrapper for regmap_init macros to include a unique lockdep key and name * for each call. No-op if CONFIG_LOCKDEP is not set. @@ -982,6 +987,21 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); #define devm_regmap_init_slimbus(slimbus, config) \ __regmap_lockdep_wrapper(__devm_regmap_init_slimbus, #config, \ slimbus, config) + +/** + * devm_regmap_init_i3c() - Initialise managed register map + * + * @i3c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_i3c(i3c, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_i3c, #config, \ + i3c, config) + int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk); void regmap_mmio_detach_clk(struct regmap *map); void regmap_exit(struct regmap *map); -- cgit v1.2.3 From 6d7c3cde93c1d9ac0b37f78ec3f2ff052159a242 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 22 May 2019 16:52:52 -0300 Subject: mm/hmm: fix use after free with struct hmm in the mmu notifiers mmu_notifier_unregister_no_release() is not a fence and the mmu_notifier system will continue to reference hmm->mn until the srcu grace period expires. Resulting in use after free races like this: CPU0 CPU1 __mmu_notifier_invalidate_range_start() srcu_read_lock hlist_for_each () // mn == hmm->mn hmm_mirror_unregister() hmm_put() hmm_free() mmu_notifier_unregister_no_release() hlist_del_init_rcu(hmm-mn->list) mn->ops->invalidate_range_start(mn, range); mm_get_hmm() mm->hmm = NULL; kfree(hmm) mutex_lock(&hmm->lock); Use SRCU to kfree the hmm memory so that the notifiers can rely on hmm existing. Get the now-safe hmm struct through container_of and directly check kref_get_unless_zero to lock it against free. Signed-off-by: Jason Gunthorpe Reviewed-by: Ira Weiny Reviewed-by: John Hubbard Reviewed-by: Ralph Campbell Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- include/linux/hmm.h | 1 + mm/hmm.c | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 7007123842ba..cb01cf1fa3c0 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -93,6 +93,7 @@ struct hmm { struct mmu_notifier mmu_notifier; struct rw_semaphore mirrors_sem; wait_queue_head_t wq; + struct rcu_head rcu; long notifiers; bool dead; }; diff --git a/mm/hmm.c b/mm/hmm.c index 826816ab2377..f6956d78e3cb 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -104,6 +104,11 @@ error: return NULL; } +static void hmm_free_rcu(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct hmm, rcu)); +} + static void hmm_free(struct kref *kref) { struct hmm *hmm = container_of(kref, struct hmm, kref); @@ -116,7 +121,7 @@ static void hmm_free(struct kref *kref) mm->hmm = NULL; spin_unlock(&mm->page_table_lock); - kfree(hmm); + mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); } static inline void hmm_put(struct hmm *hmm) @@ -144,10 +149,14 @@ void hmm_mm_destroy(struct mm_struct *mm) static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct hmm *hmm = mm_get_hmm(mm); + struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; struct hmm_range *range; + /* Bail out if hmm is in the process of being freed */ + if (!kref_get_unless_zero(&hmm->kref)) + return; + /* Report this HMM as dying. */ hmm->dead = true; @@ -185,13 +194,14 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) static int hmm_invalidate_range_start(struct mmu_notifier *mn, const struct mmu_notifier_range *nrange) { - struct hmm *hmm = mm_get_hmm(nrange->mm); + struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; struct hmm_update update; struct hmm_range *range; int ret = 0; - VM_BUG_ON(!hmm); + if (!kref_get_unless_zero(&hmm->kref)) + return 0; update.start = nrange->start; update.end = nrange->end; @@ -236,9 +246,10 @@ out: static void hmm_invalidate_range_end(struct mmu_notifier *mn, const struct mmu_notifier_range *nrange) { - struct hmm *hmm = mm_get_hmm(nrange->mm); + struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); - VM_BUG_ON(!hmm); + if (!kref_get_unless_zero(&hmm->kref)) + return; mutex_lock(&hmm->lock); hmm->notifiers--; -- cgit v1.2.3 From 5923ea6c2ce626f0aa8a547d5b7e5fce705dd3dc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 26 Apr 2019 14:40:18 +0200 Subject: gpio: pass lookup and descriptor flags to request_own When a gpio_chip wants to request a descriptor from itself using gpiochip_request_own_desc() it needs to be able to specify fully how to use the descriptor, notably line inversion semantics. The workaround in the gpiolib.c can be removed and cases (such as SPI CS) where we need at times to request a GPIO with line inversion semantics directly on a chip for workarounds, can be fully supported with this call. Fix up some users of the API that weren't really using the last flag to set up the line as input or output properly but instead just calling direction setting explicitly after requesting the line. Cc: Martin Sperl Signed-off-by: Linus Walleij --- arch/arm/mach-omap1/ams-delta-fiq.c | 4 +++- arch/arm/mach-omap1/board-ams-delta.c | 5 +++-- drivers/gpio/gpio-mvebu.c | 11 ++++------- drivers/gpio/gpiolib-acpi.c | 6 +++--- drivers/gpio/gpiolib.c | 31 ++++++++++--------------------- drivers/hid/hid-cp2112.c | 7 +++++-- drivers/memory/omap-gpmc.c | 4 +++- include/linux/gpio/driver.h | 4 +++- 8 files changed, 34 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 51212133ce06..46ca2d9d38ef 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -14,6 +14,7 @@ * the Free Software Foundation. */ #include +#include #include #include #include @@ -102,7 +103,8 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip, } for (i = 0; i < ARRAY_SIZE(irq_data); i++) { - gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], 0); + gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], + GPIO_ACTIVE_HIGH, GPIOD_IN); if (IS_ERR(gpiod)) { pr_err("%s: failed to get GPIO pin %d (%ld)\n", __func__, i, PTR_ERR(gpiod)); diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index b6e814166ee0..e49542540fc6 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include @@ -609,12 +610,12 @@ static void __init modem_assign_irq(struct gpio_chip *chip) struct gpio_desc *gpiod; gpiod = gpiochip_request_own_desc(chip, AMS_DELTA_GPIO_PIN_MODEM_IRQ, - "modem_irq", 0); + "modem_irq", GPIO_ACTIVE_HIGH, + GPIOD_IN); if (IS_ERR(gpiod)) { pr_err("%s: modem IRQ GPIO request failed (%ld)\n", __func__, PTR_ERR(gpiod)); } else { - gpiod_direction_input(gpiod); ams_delta_modem_ports[0].irq = gpiod_to_irq(gpiod); } } diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 059094ac44cb..869d47f89599 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -618,18 +619,14 @@ static int mvebu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) ret = -EBUSY; } else { desc = gpiochip_request_own_desc(&mvchip->chip, - pwm->hwpwm, "mvebu-pwm", 0); + pwm->hwpwm, "mvebu-pwm", + GPIO_ACTIVE_HIGH, + GPIOD_OUT_LOW); if (IS_ERR(desc)) { ret = PTR_ERR(desc); goto out; } - ret = gpiod_direction_output(desc, 0); - if (ret) { - gpiochip_free_own_desc(desc); - goto out; - } - mvpwm->gpiod = desc; } out: diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index c9fc9e232aaf..39f2f9035c11 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -217,14 +217,13 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, if (!handler) return AE_OK; - desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event", 0); + desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event", + GPIO_ACTIVE_HIGH, GPIOD_IN); if (IS_ERR(desc)) { dev_err(chip->parent, "Failed to request GPIO\n"); return AE_ERROR; } - gpiod_direction_input(desc); - ret = gpiochip_lock_as_irq(chip, pin); if (ret) { dev_err(chip->parent, "Failed to lock GPIO as interrupt\n"); @@ -951,6 +950,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, const char *label = "ACPI:OpRegion"; desc = gpiochip_request_own_desc(chip, pin, label, + GPIO_ACTIVE_HIGH, flags); if (IS_ERR(desc)) { status = AE_ERROR; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e013d417a936..4561cb39bdb4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2503,7 +2503,11 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested); * @chip: GPIO chip * @hwnum: hardware number of the GPIO for which to request the descriptor * @label: label for the GPIO - * @flags: flags for this GPIO or 0 if default + * @lflags: lookup flags for this GPIO or 0 if default, this can be used to + * specify things like line inversion semantics with the machine flags + * such as GPIO_OUT_LOW + * @dflags: descriptor request flags for this GPIO or 0 if default, this + * can be used to specify consumer semantics such as open drain * * Function allows GPIO chip drivers to request and use their own GPIO * descriptors via gpiolib API. Difference to gpiod_request() is that this @@ -2517,9 +2521,9 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested); */ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, const char *label, - enum gpiod_flags flags) + enum gpio_lookup_flags lflags, + enum gpiod_flags dflags) { - unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT; struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum); int err; @@ -2532,7 +2536,7 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, if (err < 0) return ERR_PTR(err); - err = gpiod_configure_flags(desc, label, lflags, flags); + err = gpiod_configure_flags(desc, label, lflags, dflags); if (err) { chip_err(chip, "setup of own GPIO %s failed\n", label); gpiod_free_commit(desc); @@ -4420,15 +4424,8 @@ int gpiod_hog(struct gpio_desc *desc, const char *name, chip = gpiod_to_chip(desc); hwnum = gpio_chip_hwgpio(desc); - /* - * FIXME: not very elegant that we call gpiod_configure_flags() - * twice here (once inside gpiochip_request_own_desc() and - * again here), but the gpiochip_request_own_desc() is external - * and cannot really pass the lflags so this is the lesser evil - * at the moment. Pass zero as dflags on this first call so we - * don't screw anything up. - */ - local_desc = gpiochip_request_own_desc(chip, hwnum, name, 0); + local_desc = gpiochip_request_own_desc(chip, hwnum, name, + lflags, dflags); if (IS_ERR(local_desc)) { status = PTR_ERR(local_desc); pr_err("requesting hog GPIO %s (chip %s, offset %d) failed, %d\n", @@ -4436,14 +4433,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name, return status; } - status = gpiod_configure_flags(desc, name, lflags, dflags); - if (status < 0) { - pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n", - name, chip->label, hwnum, status); - gpiochip_free_own_desc(desc); - return status; - } - /* Mark GPIO as hogged so it can be identified and removed later */ set_bit(FLAG_IS_HOGGED, &desc->flags); diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 47f65857408d..f6fb97a14de6 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -24,7 +24,8 @@ * https://www.silabs.com/documents/public/application-notes/an495-cp2112-interface-specification.pdf */ -#include +#include +#include #include #include #include @@ -1203,7 +1204,9 @@ static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev, return -EINVAL; dev->desc[pin] = gpiochip_request_own_desc(&dev->gc, pin, - "HID/I2C:Event", 0); + "HID/I2C:Event", + GPIO_ACTIVE_HIGH, + GPIOD_IN); if (IS_ERR(dev->desc[pin])) { dev_err(dev->gc.parent, "Failed to request GPIO\n"); return PTR_ERR(dev->desc[pin]); diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index f6297599433f..f4f98957dc86 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -22,6 +22,7 @@ #include #include #include /* GPIO descriptor enum */ +#include #include #include #include @@ -2172,7 +2173,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, waitpin_desc = gpiochip_request_own_desc(&gpmc->gpio_chip, wait_pin, "WAITPIN", - 0); + GPIO_ACTIVE_HIGH, + GPIOD_IN); if (IS_ERR(waitpin_desc)) { dev_err(&pdev->dev, "invalid wait-pin: %d\n", wait_pin); ret = PTR_ERR(waitpin_desc); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a1d273c96016..937c40fb61f7 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -18,6 +18,7 @@ struct seq_file; struct gpio_device; struct module; enum gpiod_flags; +enum gpio_lookup_flags; #ifdef CONFIG_GPIOLIB @@ -614,7 +615,8 @@ gpiochip_remove_pin_ranges(struct gpio_chip *chip) struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, const char *label, - enum gpiod_flags flags); + enum gpio_lookup_flags lflags, + enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); void devprop_gpiochip_set_names(struct gpio_chip *chip, -- cgit v1.2.3 From 2809b392a62ae307da058a52d451b2fc3ce4de7e Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 5 Jun 2019 22:32:21 +0200 Subject: compiler.h: add CC_USING_PATCHABLE_FUNCTION_ENTRY This can be used for architectures implementing dynamic ftrace via -fpatchable-function-entry. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- include/linux/compiler_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 19e58b9138a0..095d55c3834d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -112,6 +112,8 @@ struct ftrace_likely_data { #if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) +#elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY) +#define notrace __attribute__((patchable_function_entry(0, 0))) #else #define notrace __attribute__((__no_instrument_function__)) #endif -- cgit v1.2.3 From cb1aaebea8d79860181559d7b5d482aea63db113 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Jun 2019 15:54:32 -0300 Subject: docs: fix broken documentation links Mostly due to x86 and acpi conversion, several documentation links are still pointing to the old file. Fix them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Wolfram Sang Reviewed-by: Sven Van Asbroeck Reviewed-by: Bhupesh Sharma Acked-by: Mark Brown Signed-off-by: Jonathan Corbet --- Documentation/acpi/dsd/leds.txt | 2 +- Documentation/admin-guide/kernel-parameters.rst | 6 +++--- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++-------- Documentation/admin-guide/ras.rst | 2 +- Documentation/devicetree/bindings/net/fsl-enetc.txt | 7 +++---- .../devicetree/bindings/pci/amlogic,meson-pcie.txt | 2 +- .../bindings/regulator/qcom,rpmh-regulator.txt | 2 +- Documentation/devicetree/booting-without-of.txt | 2 +- Documentation/driver-api/gpio/board.rst | 2 +- Documentation/driver-api/gpio/consumer.rst | 2 +- Documentation/firmware-guide/acpi/enumeration.rst | 2 +- Documentation/firmware-guide/acpi/method-tracing.rst | 2 +- Documentation/i2c/instantiating-devices | 2 +- Documentation/sysctl/kernel.txt | 4 ++-- Documentation/translations/zh_CN/process/4.Coding.rst | 2 +- Documentation/x86/x86_64/5level-paging.rst | 2 +- Documentation/x86/x86_64/boot-options.rst | 4 ++-- Documentation/x86/x86_64/fake-numa-for-cpusets.rst | 2 +- MAINTAINERS | 4 ++-- arch/arm/Kconfig | 2 +- arch/arm64/kernel/kexec_image.c | 2 +- arch/x86/Kconfig | 14 +++++++------- arch/x86/Kconfig.debug | 2 +- arch/x86/boot/header.S | 2 +- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/bootparam_utils.h | 2 +- arch/x86/include/asm/page_64_types.h | 2 +- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/kernel/cpu/microcode/amd.c | 2 +- arch/x86/kernel/kexec-bzimage64.c | 2 +- arch/x86/kernel/pci-dma.c | 2 +- arch/x86/mm/tlb.c | 2 +- arch/x86/platform/pvh/enlighten.c | 2 +- drivers/acpi/Kconfig | 10 +++++----- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/staging/fieldbus/Documentation/fieldbus_dev.txt | 4 ++-- drivers/vhost/vhost.c | 2 +- include/acpi/acpi_drivers.h | 2 +- include/linux/fs_context.h | 2 +- include/linux/lsm_hooks.h | 2 +- mm/Kconfig | 2 +- security/Kconfig | 2 +- tools/include/linux/err.h | 2 +- tools/objtool/Documentation/stack-validation.txt | 4 ++-- 44 files changed, 70 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/Documentation/acpi/dsd/leds.txt b/Documentation/acpi/dsd/leds.txt index 81a63af42ed2..cc58b1a574c5 100644 --- a/Documentation/acpi/dsd/leds.txt +++ b/Documentation/acpi/dsd/leds.txt @@ -96,4 +96,4 @@ where , referenced 2019-02-21. -[7] Documentation/acpi/dsd/data-node-reference.txt +[7] Documentation/firmware-guide/acpi/dsd/data-node-references.rst diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 0124980dca2d..8d3273e32eb1 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -167,7 +167,7 @@ parameter is applicable:: X86-32 X86-32, aka i386 architecture is enabled. X86-64 X86-64 architecture is enabled. More X86-64 boot options can be found in - Documentation/x86/x86_64/boot-options.txt . + Documentation/x86/x86_64/boot-options.rst. X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64) X86_UV SGI UV support is enabled. XEN Xen support is enabled @@ -181,10 +181,10 @@ In addition, the following text indicates that the option:: Parameters denoted with BOOT are actually interpreted by the boot loader, and have no meaning to the kernel directly. Do not modify the syntax of boot loader parameters without extreme -need or coordination with . +need or coordination with . There are also arch-specific kernel-parameters not documented here. -See for example . +See for example . Note that ALL kernel parameters listed below are CASE SENSITIVE, and that a trailing = on the name of any parameter states that that parameter will diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 79d043b8850d..1abd7e145357 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -53,7 +53,7 @@ ACPI_DEBUG_PRINT statements, e.g., ACPI_DEBUG_PRINT((ACPI_DB_INFO, ... The debug_level mask defaults to "info". See - Documentation/acpi/debug.txt for more information about + Documentation/firmware-guide/acpi/debug.rst for more information about debug layers and levels. Enable processor driver info messages: @@ -963,7 +963,7 @@ for details. nompx [X86] Disables Intel Memory Protection Extensions. - See Documentation/x86/intel_mpx.txt for more + See Documentation/x86/intel_mpx.rst for more information about the feature. nopku [X86] Disable Memory Protection Keys CPU feature found @@ -1189,7 +1189,7 @@ that is to be dynamically loaded by Linux. If there are multiple variables with the same name but with different vendor GUIDs, all of them will be loaded. See - Documentation/acpi/ssdt-overlays.txt for details. + Documentation/admin-guide/acpi/ssdt-overlays.rst for details. eisa_irq_edge= [PARISC,HW] @@ -2383,7 +2383,7 @@ mce [X86-32] Machine Check Exception - mce=option [X86-64] See Documentation/x86/x86_64/boot-options.txt + mce=option [X86-64] See Documentation/x86/x86_64/boot-options.rst md= [HW] RAID subsystems devices and level See Documentation/admin-guide/md.rst. @@ -2439,7 +2439,7 @@ set according to the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config option. - See Documentation/memory-hotplug.txt. + See Documentation/admin-guide/mm/memory-hotplug.rst. memmap=exactmap [KNL,X86] Enable setting of an exact E820 memory map, as specified by the user. @@ -2528,7 +2528,7 @@ mem_encrypt=on: Activate SME mem_encrypt=off: Do not activate SME - Refer to Documentation/x86/amd-memory-encryption.txt + Refer to Documentation/virtual/kvm/amd-memory-encryption.rst for details on when memory encryption can be activated. mem_sleep_default= [SUSPEND] Default system suspend mode: @@ -3529,7 +3529,7 @@ See Documentation/blockdev/paride.txt. pirq= [SMP,APIC] Manual mp-table setup - See Documentation/x86/i386/IO-APIC.txt. + See Documentation/x86/i386/IO-APIC.rst. plip= [PPT,NET] Parallel port network link Format: { parport | timid | 0 } @@ -5055,7 +5055,7 @@ Can be used multiple times for multiple devices. vga= [BOOT,X86-32] Select a particular video mode - See Documentation/x86/boot.txt and + See Documentation/x86/boot.rst and Documentation/svga.txt. Use vga=ask for menu. This is actually a boot loader parameter; the value is diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst index c7495e42e6f4..2b20f5f7380d 100644 --- a/Documentation/admin-guide/ras.rst +++ b/Documentation/admin-guide/ras.rst @@ -199,7 +199,7 @@ Architecture (MCA)\ [#f3]_. mode). .. [#f3] For more details about the Machine Check Architecture (MCA), - please read Documentation/x86/x86_64/machinecheck at the Kernel tree. + please read Documentation/x86/x86_64/machinecheck.rst at the Kernel tree. EDAC - Error Detection And Correction ************************************* diff --git a/Documentation/devicetree/bindings/net/fsl-enetc.txt b/Documentation/devicetree/bindings/net/fsl-enetc.txt index c812e25ae90f..25fc687419db 100644 --- a/Documentation/devicetree/bindings/net/fsl-enetc.txt +++ b/Documentation/devicetree/bindings/net/fsl-enetc.txt @@ -16,8 +16,8 @@ Required properties: In this case, the ENETC node should include a "mdio" sub-node that in turn should contain the "ethernet-phy" node describing the external phy. Below properties are required, their bindings -already defined in ethernet.txt or phy.txt, under -Documentation/devicetree/bindings/net/*. +already defined in Documentation/devicetree/bindings/net/ethernet.txt or +Documentation/devicetree/bindings/net/phy.txt. Required: @@ -51,8 +51,7 @@ Example: connection: In this case, the ENETC port node defines a fixed link connection, -as specified by "fixed-link.txt", under -Documentation/devicetree/bindings/net/*. +as specified by Documentation/devicetree/bindings/net/fixed-link.txt. Required: diff --git a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt index 12b18f82d441..efa2c8b9b85a 100644 --- a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt +++ b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt @@ -3,7 +3,7 @@ Amlogic Meson AXG DWC PCIE SoC controller Amlogic Meson PCIe host controller is based on the Synopsys DesignWare PCI core. It shares common functions with the PCIe DesignWare core driver and inherits common properties defined in -Documentation/devicetree/bindings/pci/designware-pci.txt. +Documentation/devicetree/bindings/pci/designware-pcie.txt. Additional properties are described here: diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt index 7ef2dbe48e8a..14d2eee96b3d 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt @@ -97,7 +97,7 @@ Second Level Nodes - Regulators sent for this regulator including those which are for a strictly lower power state. -Other properties defined in Documentation/devicetree/bindings/regulator.txt +Other properties defined in Documentation/devicetree/bindings/regulator/regulator.txt may also be used. regulator-initial-mode and regulator-allowed-modes may be specified for VRM regulators using mode values from include/dt-bindings/regulator/qcom,rpmh-regulator.h. regulator-allow-bypass diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index e86bd2f64117..60f8640f2b2f 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt @@ -277,7 +277,7 @@ it with special cases. the decompressor (the real mode entry point goes to the same 32bit entry point once it switched into protected mode). That entry point supports one calling convention which is documented in - Documentation/x86/boot.txt + Documentation/x86/boot.rst The physical pointer to the device-tree block (defined in chapter II) is passed via setup_data which requires at least boot protocol 2.09. The type filed is defined as diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst index b37f3f7b8926..ce91518bf9f4 100644 --- a/Documentation/driver-api/gpio/board.rst +++ b/Documentation/driver-api/gpio/board.rst @@ -101,7 +101,7 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1:: } For more information about the ACPI GPIO bindings see -Documentation/acpi/gpio-properties.txt. +Documentation/firmware-guide/acpi/gpio-properties.rst. Platform Data ------------- diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index 5e4d8aa68913..fdecb6d711db 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -437,7 +437,7 @@ case, it will be handled by the GPIO subsystem automatically. However, if the _DSD is not present, the mappings between GpioIo()/GpioInt() resources and GPIO connection IDs need to be provided by device drivers. -For details refer to Documentation/acpi/gpio-properties.txt +For details refer to Documentation/firmware-guide/acpi/gpio-properties.rst Interacting With the Legacy GPIO Subsystem diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst index 850be9696931..1252617b520f 100644 --- a/Documentation/firmware-guide/acpi/enumeration.rst +++ b/Documentation/firmware-guide/acpi/enumeration.rst @@ -339,7 +339,7 @@ a code like this:: There are also devm_* versions of these functions which release the descriptors once the device is released. -See Documentation/acpi/gpio-properties.txt for more information about the +See Documentation/firmware-guide/acpi/gpio-properties.rst for more information about the _DSD binding related to GPIOs. MFD devices diff --git a/Documentation/firmware-guide/acpi/method-tracing.rst b/Documentation/firmware-guide/acpi/method-tracing.rst index d0b077b73f5f..0aa7e2c5d32a 100644 --- a/Documentation/firmware-guide/acpi/method-tracing.rst +++ b/Documentation/firmware-guide/acpi/method-tracing.rst @@ -68,7 +68,7 @@ c. Filter out the debug layer/level matched logs when the specified Where: 0xXXXXXXXX/0xYYYYYYYY - Refer to Documentation/acpi/debug.txt for possible debug layer/level + Refer to Documentation/firmware-guide/acpi/debug.rst for possible debug layer/level masking values. \PPPP.AAAA.TTTT.HHHH Full path of a control method that can be found in the ACPI namespace. diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices index 0d85ac1935b7..5a3e2f331e8c 100644 --- a/Documentation/i2c/instantiating-devices +++ b/Documentation/i2c/instantiating-devices @@ -85,7 +85,7 @@ Method 1c: Declare the I2C devices via ACPI ------------------------------------------- ACPI can also describe I2C devices. There is special documentation for this -which is currently located at Documentation/acpi/enumeration.txt. +which is currently located at Documentation/firmware-guide/acpi/enumeration.rst. Method 2: Instantiate the devices explicitly diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index f0c86fbb3b48..92f7f34b021a 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -155,7 +155,7 @@ is 0x15 and the full version number is 0x234, this file will contain the value 340 = 0x154. See the type_of_loader and ext_loader_type fields in -Documentation/x86/boot.txt for additional information. +Documentation/x86/boot.rst for additional information. ============================================================== @@ -167,7 +167,7 @@ The complete bootloader version number. In the example above, this file will contain the value 564 = 0x234. See the type_of_loader and ext_loader_ver fields in -Documentation/x86/boot.txt for additional information. +Documentation/x86/boot.rst for additional information. ============================================================== diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst index 5301e9d55255..8bb777941394 100644 --- a/Documentation/translations/zh_CN/process/4.Coding.rst +++ b/Documentation/translations/zh_CN/process/4.Coding.rst @@ -241,7 +241,7 @@ scripts/coccinelle目录下已经打包了相当多的内核“语义补丁” 任何添加新用户空间界面的代码(包括新的sysfs或/proc文件)都应该包含该界面的 文档,该文档使用户空间开发人员能够知道他们在使用什么。请参阅 -Documentation/abi/readme,了解如何格式化此文档以及需要提供哪些信息。 +Documentation/ABI/README,了解如何格式化此文档以及需要提供哪些信息。 文件 :ref:`Documentation/admin-guide/kernel-parameters.rst ` 描述了内核的所有引导时间参数。任何添加新参数的补丁都应该向该文件添加适当的 diff --git a/Documentation/x86/x86_64/5level-paging.rst b/Documentation/x86/x86_64/5level-paging.rst index ab88a4514163..44856417e6a5 100644 --- a/Documentation/x86/x86_64/5level-paging.rst +++ b/Documentation/x86/x86_64/5level-paging.rst @@ -20,7 +20,7 @@ physical address space. This "ought to be enough for anybody" ©. QEMU 2.9 and later support 5-level paging. Virtual memory layout for 5-level paging is described in -Documentation/x86/x86_64/mm.txt +Documentation/x86/x86_64/mm.rst Enabling 5-level paging diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst index 2f69836b8445..6a4285a3c7a4 100644 --- a/Documentation/x86/x86_64/boot-options.rst +++ b/Documentation/x86/x86_64/boot-options.rst @@ -9,7 +9,7 @@ only the AMD64 specific ones are listed here. Machine check ============= -Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. +Please see Documentation/x86/x86_64/machinecheck.rst for sysfs runtime tunables. mce=off Disable machine check @@ -89,7 +89,7 @@ APICs Don't use the local APIC (alias for i386 compatibility) pirq=... - See Documentation/x86/i386/IO-APIC.txt + See Documentation/x86/i386/IO-APIC.rst noapictimer Don't set up the APIC timer diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst index 74fbb78b3c67..04df57b9aa3f 100644 --- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst @@ -18,7 +18,7 @@ For more information on the features of cpusets, see Documentation/cgroup-v1/cpusets.txt. There are a number of different configurations you can use for your needs. For more information on the numa=fake command line option and its various ways of -configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. +configuring fake nodes, see Documentation/x86/x86_64/boot-options.rst. For the purposes of this introduction, we'll assume a very primitive NUMA emulation setup of "numa=fake=4*512,". This will split our system memory into diff --git a/MAINTAINERS b/MAINTAINERS index 5cfbea4ce575..26e0369c1641 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3874,7 +3874,7 @@ F: Documentation/devicetree/bindings/hwmon/cirrus,lochnagar.txt F: Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.txt F: Documentation/devicetree/bindings/regulator/cirrus,lochnagar.txt F: Documentation/devicetree/bindings/sound/cirrus,lochnagar.txt -F: Documentation/hwmon/lochnagar +F: Documentation/hwmon/lochnagar.rst CISCO FCOE HBA DRIVER M: Satish Kharat @@ -11272,7 +11272,7 @@ NXP FXAS21002C DRIVER M: Rui Miguel Silva L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/gyroscope/fxas21002c.txt +F: Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.txt F: drivers/iio/gyro/fxas21002c_core.c F: drivers/iio/gyro/fxas21002c.h F: drivers/iio/gyro/fxas21002c_i2c.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8869742a85df..0f220264cc23 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1263,7 +1263,7 @@ config SMP uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. - See also , + See also , and the SMP-HOWTO available at . diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 07bf740bea91..31cc2f423aa8 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -53,7 +53,7 @@ static void *image_load(struct kimage *image, /* * We require a kernel with an unambiguous Image header. Per - * Documentation/booting.txt, this is the case when image_size + * Documentation/arm64/booting.txt, this is the case when image_size * is non-zero (practically speaking, since v3.17). */ h = (struct arm64_image_header *)kernel; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d87d53fcd261..9f1f7b47621c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -395,7 +395,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also , + See also , and the SMP-HOWTO available at . @@ -1290,7 +1290,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/microcode.txt. For that you need to enable + in Documentation/x86/microcode.rst. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -1329,7 +1329,7 @@ config MICROCODE_OLD_INTERFACE It is inadequate because it runs too late to be able to properly load microcode on a machine and it needs special tools. Instead, you should've switched to the early loading method with the initrd or - builtin microcode by now: Documentation/x86/microcode.txt + builtin microcode by now: Documentation/x86/microcode.rst config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -1478,7 +1478,7 @@ config X86_5LEVEL A kernel with the option enabled can be booted on machines that support 4- or 5-level paging. - See Documentation/x86/x86_64/5level-paging.txt for more + See Documentation/x86/x86_64/5level-paging.rst for more information. Say N if unsure. @@ -1626,7 +1626,7 @@ config ARCH_MEMORY_PROBE depends on X86_64 && MEMORY_HOTPLUG help This option enables a sysfs memory/probe interface for testing. - See Documentation/memory-hotplug.txt for more information. + See Documentation/admin-guide/mm/memory-hotplug.rst for more information. If you are unsure how to answer this question, answer N. config ARCH_PROC_KCORE_TEXT @@ -1783,7 +1783,7 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See for more information. + See for more information. config MTRR_SANITIZER def_bool y @@ -1895,7 +1895,7 @@ config X86_INTEL_MPX process and adds some branches to paths used during exec() and munmap(). - For details, see Documentation/x86/intel_mpx.txt + For details, see Documentation/x86/intel_mpx.rst If unsure, say N. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index f730680dc818..59f598543203 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -156,7 +156,7 @@ config IOMMU_DEBUG code. When you use it make sure you have a big enough IOMMU/AGP aperture. Most of the options enabled by this can be set more finegrained using the iommu= command line - options. See Documentation/x86/x86_64/boot-options.txt for more + options. See Documentation/x86/x86_64/boot-options.rst for more details. config IOMMU_LEAK diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 850b8762e889..90d791ca1a95 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -313,7 +313,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just type_of_loader: .byte 0 # 0 means ancient bootloader, newer # bootloaders know to change this. - # See Documentation/x86/boot.txt for + # See Documentation/x86/boot.rst for # assigned ids # flags, unused bits must be zero (RFU) bit within loadflags diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 11aa3b2afa4d..33f9fc38d014 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -8,7 +8,7 @@ * * entry.S contains the system-call and fault low-level handling routines. * - * Some of this is documented in Documentation/x86/entry_64.txt + * Some of this is documented in Documentation/x86/entry_64.rst * * A note on terminology: * - iret frame: Architecture defined interrupt frame from SS to RIP diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f6f6ef436599..101eb944f13c 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -24,7 +24,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear * this field. The purpose of this field is to guarantee * compliance with the x86 boot spec located in - * Documentation/x86/boot.txt . That spec says that the + * Documentation/x86/boot.rst . That spec says that the * *whole* structure should be cleared, after which only the * portion defined by struct setup_header (boot_params->hdr) * should be copied in. diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 793c14c372cb..288b065955b7 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -48,7 +48,7 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 52 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 88bca456da99..52e5f5f2240d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -103,7 +103,7 @@ extern unsigned int ptrs_per_p4d; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* - * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * See Documentation/x86/x86_64/mm.rst for a description of the memory map. * * Be very careful vs. KASLR when changing anything here. The KASLR address * range must not overlap with anything except the KASAN shadow area, which diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index e1f3ba19ba54..06d4e67f31ab 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -61,7 +61,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.txt + * format. See Documentation/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 22f60dd26460..b07e7069b09e 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -416,7 +416,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_offset = params_cmdline_sz; efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); - /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + /* Copy setup header onto bootparams. Documentation/x86/boot.rst */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; /* Is there a limit on setup header size? */ diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dcd272dbd0a9..f62b498b18fb 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void) } /* - * See for the iommu kernel + * See for the iommu kernel * parameter documentation. */ static __init int iommu_setup(char *p) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7f61431c75fb..400c1ba033aa 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -711,7 +711,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, } /* - * See Documentation/x86/tlb.txt for details. We choose 33 + * See Documentation/x86/tlb.rst for details. We choose 33 * because it is large enough to cover the vast majority (at * least 95%) of allocations, and is small enough that we are * confident it will not cause too much overhead. Each single diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 1861a2ba0f2b..c0a502f7e3a7 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest) } /* - * See Documentation/x86/boot.txt. + * See Documentation/x86/boot.rst. * * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 283ee94224c6..2438f37f2ca1 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -333,7 +333,7 @@ config ACPI_CUSTOM_DSDT_FILE depends on !STANDALONE help This option supports a custom DSDT by linking it into the kernel. - See Documentation/acpi/dsdt-override.txt + See Documentation/admin-guide/acpi/dsdt-override.rst Enter the full path name to the file which includes the AmlCode or dsdt_aml_code declaration. @@ -355,7 +355,7 @@ config ACPI_TABLE_UPGRADE This option provides functionality to upgrade arbitrary ACPI tables via initrd. No functional change if no ACPI tables are passed via initrd, therefore it's safe to say Y. - See Documentation/acpi/initrd_table_override.txt for details + See Documentation/admin-guide/acpi/initrd_table_override.rst for details config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD bool "Override ACPI tables from built-in initrd" @@ -365,7 +365,7 @@ config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD This option provides functionality to override arbitrary ACPI tables from built-in uncompressed initrd. - See Documentation/acpi/initrd_table_override.txt for details + See Documentation/admin-guide/acpi/initrd_table_override.rst for details config ACPI_DEBUG bool "Debug Statements" @@ -374,7 +374,7 @@ config ACPI_DEBUG output and increases the kernel size by around 50K. Use the acpi.debug_layer and acpi.debug_level kernel command-line - parameters documented in Documentation/acpi/debug.txt and + parameters documented in Documentation/firmware-guide/acpi/debug.rst and Documentation/admin-guide/kernel-parameters.rst to control the type and amount of debug output. @@ -445,7 +445,7 @@ config ACPI_CUSTOM_METHOD help This debug facility allows ACPI AML methods to be inserted and/or replaced without rebooting the system. For details refer to: - Documentation/acpi/method-customizing.txt. + Documentation/firmware-guide/acpi/method-customizing.rst. NOTE: This option is security sensitive, because it allows arbitrary kernel memory to be written to by root (uid=0) users, allowing them diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index b17b79e612a3..ac6280ad43a1 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1075,7 +1075,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv, phy_interface_t intf) } /* Indicate that we support PAUSE frames (see comment in - * Documentation/networking/phy.txt) + * Documentation/networking/phy.rst) */ phy_support_asym_pause(phydev); diff --git a/drivers/staging/fieldbus/Documentation/fieldbus_dev.txt b/drivers/staging/fieldbus/Documentation/fieldbus_dev.txt index 56af3f650fa3..89fb8e14676f 100644 --- a/drivers/staging/fieldbus/Documentation/fieldbus_dev.txt +++ b/drivers/staging/fieldbus/Documentation/fieldbus_dev.txt @@ -54,8 +54,8 @@ a limited few common behaviours and properties. This allows us to define a simple interface consisting of a character device and a set of sysfs files: See: -Documentation/ABI/testing/sysfs-class-fieldbus-dev -Documentation/ABI/testing/fieldbus-dev-cdev +drivers/staging/fieldbus/Documentation/ABI/sysfs-class-fieldbus-dev +drivers/staging/fieldbus/Documentation/ABI/fieldbus-dev-cdev Note that this simple interface does not provide a way to modify adapter configuration settings. It is therefore useful only for adapters that get their diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1e3ed41ae1f3..69938dbae2d0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1694,7 +1694,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_ioctl); /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry - * returning -EFAULT). See Documentation/x86/exception-tables.txt. + * returning -EFAULT). See Documentation/x86/exception-tables.rst. */ static int set_bit_to_user(int nr, void __user *addr) { diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index de1804aeaf69..98e3db7a89cd 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -25,7 +25,7 @@ #define ACPI_MAX_STRING 80 /* - * Please update drivers/acpi/debug.c and Documentation/acpi/debug.txt + * Please update drivers/acpi/debug.c and Documentation/firmware-guide/acpi/debug.rst * if you add to this list. */ #define ACPI_BUS_COMPONENT 0x00010000 diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 1f966670c8dc..623eb58560b9 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -85,7 +85,7 @@ struct fs_parameter { * Superblock creation fills in ->root whereas reconfiguration begins with this * already set. * - * See Documentation/filesystems/mounting.txt + * See Documentation/filesystems/mount_api.txt */ struct fs_context { const struct fs_context_operations *ops; diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 47f58cfb6a19..df1318d85f7d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -77,7 +77,7 @@ * state. This is called immediately after commit_creds(). * * Security hooks for mount using fs_context. - * [See also Documentation/filesystems/mounting.txt] + * [See also Documentation/filesystems/mount_api.txt] * * @fs_context_dup: * Allocate and attach a security structure to sc->security. This pointer diff --git a/mm/Kconfig b/mm/Kconfig index ee8d1f311858..6e5fb81bde4b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -165,7 +165,7 @@ config MEMORY_HOTPLUG_DEFAULT_ONLINE onlining policy (/sys/devices/system/memory/auto_online_blocks) which determines what happens to newly added memory regions. Policy setting can always be changed at runtime. - See Documentation/memory-hotplug.txt for more information. + See Documentation/admin-guide/mm/memory-hotplug.rst for more information. Say Y here if you want all hot-plugged memory blocks to appear in 'online' state by default. diff --git a/security/Kconfig b/security/Kconfig index aeac3676dd4d..6d75ed71970c 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -62,7 +62,7 @@ config PAGE_TABLE_ISOLATION ensuring that the majority of kernel addresses are not mapped into userspace. - See Documentation/x86/pti.txt for more details. + See Documentation/x86/pti.rst for more details. config SECURITY_INFINIBAND bool "Infiniband Security Hooks" diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h index 2f5a12b88a86..25f2bb3a991d 100644 --- a/tools/include/linux/err.h +++ b/tools/include/linux/err.h @@ -20,7 +20,7 @@ * Userspace note: * The same principle works for userspace, because 'error' pointers * fall down to the unused hole far from user space, as described - * in Documentation/x86/x86_64/mm.txt for x86_64 arch: + * in Documentation/x86/x86_64/mm.rst for x86_64 arch: * * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 4dd11a554b9b..de094670050b 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -21,7 +21,7 @@ instructions). Similarly, it knows how to follow switch statements, for which gcc sometimes uses jump tables. (Objtool also has an 'orc generate' subcommand which generates debuginfo -for the ORC unwinder. See Documentation/x86/orc-unwinder.txt in the +for the ORC unwinder. See Documentation/x86/orc-unwinder.rst in the kernel tree for more details.) @@ -101,7 +101,7 @@ b) ORC (Oops Rewind Capability) unwind table generation band. So it doesn't affect runtime performance and it can be reliable even when interrupts or exceptions are involved. - For more details, see Documentation/x86/orc-unwinder.txt. + For more details, see Documentation/x86/orc-unwinder.rst. c) Higher live patching compatibility rate -- cgit v1.2.3 From 5c437fa29561f5809ef114ba3a5e80556cc43fb3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Jun 2019 15:54:35 -0300 Subject: docs: fs: fix broken links to vfs.txt with was renamed to vfs.rst A recent documentation conversion renamed this file but forgot to update the links. Fixes: af96c1e304f7 ("docs: filesystems: vfs: Convert vfs.txt to RST") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- Documentation/filesystems/porting | 10 +++++----- include/linux/dcache.h | 4 ++-- include/linux/fs.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 3bd1148d8bb6..2813a19389fe 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -330,14 +330,14 @@ unreferenced dentries, and is now only called when the dentry refcount goes to [mandatory] .d_compare() calling convention and locking rules are significantly -changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +changed. Read updated documentation in Documentation/filesystems/vfs.rst (and look at examples of other filesystems) for guidance. --- [mandatory] .d_hash() calling convention and locking rules are significantly -changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +changed. Read updated documentation in Documentation/filesystems/vfs.rst (and look at examples of other filesystems) for guidance. --- @@ -377,12 +377,12 @@ where possible. the filesystem provides it), which requires dropping out of rcu-walk mode. This may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be returned if the filesystem cannot handle rcu-walk. See -Documentation/filesystems/vfs.txt for more details. +Documentation/filesystems/vfs.rst for more details. permission is an inode permission check that is called on many or all directory inodes on the way down a path walk (to check for exec permission). It must now be rcu-walk aware (mask & MAY_NOT_BLOCK). See -Documentation/filesystems/vfs.txt for more details. +Documentation/filesystems/vfs.rst for more details. -- [mandatory] @@ -625,7 +625,7 @@ in your dentry operations instead. -- [mandatory] ->clone_file_range() and ->dedupe_file_range have been replaced with - ->remap_file_range(). See Documentation/filesystems/vfs.txt for more + ->remap_file_range(). See Documentation/filesystems/vfs.rst for more information. -- [recommended] diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f14e587c5d5d..5e0eadf7de55 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -153,7 +153,7 @@ struct dentry_operations { * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/Locking. Keep it updated! * - * FUrther descriptions are found in Documentation/filesystems/vfs.txt. + * FUrther descriptions are found in Documentation/filesystems/vfs.rst. * Keep it updated too! */ @@ -568,7 +568,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * - * See also: Documentation/filesystems/vfs.txt + * See also: Documentation/filesystems/vfs.rst */ static inline struct dentry *d_real(struct dentry *dentry, const struct inode *inode) diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..c564cf3f48d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1769,7 +1769,7 @@ struct block_device_operations; /* * These flags control the behavior of the remap_file_range function pointer. * If it is called with len == 0 that means "remap to end of source file". - * See Documentation/filesystems/vfs.txt for more details about this call. + * See Documentation/filesystems/vfs.rst for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request -- cgit v1.2.3 From d461933638ae9fa49ad22f60a40de5b3ed414912 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 8 Jun 2019 15:04:29 +0300 Subject: net: dsa: tag_8021q: Create helper function for removing VLAN header This removes the existing implementation from tag_sja1105, which was partially incorrect (it was not changing the MAC header offset, thereby leaving it to point 4 bytes earlier than it should have). This overwrites the VLAN tag by moving the Ethernet source and destination MACs 4 bytes to the right. Then skb->data (assumed to be pointing immediately after the EtherType) is temporarily pushed to the beginning of the new Ethernet header, the new Ethernet header offset and length are recorded, then skb->data is moved back to where it was. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 16 ++++++------- net/dsa/tag_8021q.c | 57 ++++++++++++++++++++++++++++++----------------- net/dsa/tag_sja1105.c | 19 ++++++++-------- 3 files changed, 53 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 3911e0586478..0aa803c451a3 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -20,9 +20,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); -struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, - struct packet_type *pt, u16 *tpid, u16 *tci); - u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); @@ -31,6 +28,8 @@ int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); +struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb); + #else int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, @@ -45,12 +44,6 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, return NULL; } -struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, - struct packet_type *pt, u16 *tpid, u16 *tci) -{ - return NULL; -} - u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) { return 0; @@ -71,6 +64,11 @@ int dsa_8021q_rx_source_port(u16 vid) return 0; } +struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) +{ + return NULL; +} + #endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */ #endif /* _NET_DSA_8021Q_H */ diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 65a35e976d7b..6ebbd799c4eb 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -235,31 +235,48 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, - struct packet_type *pt, u16 *tpid, u16 *tci) +/* In the DSA packet_type handler, skb->data points in the middle of the VLAN + * tag, after tpid and before tci. This is because so far, ETH_HLEN + * (DMAC, SMAC, EtherType) bytes were pulled. + * There are 2 bytes of VLAN tag left in skb->data, and upper + * layers expect the 'real' EtherType to be consumed as well. + * Coincidentally, a VLAN header is also of the same size as + * the number of bytes that need to be pulled. + * + * skb_mac_header skb->data + * | | + * v v + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+-------+-------+-------+ + * | Destination MAC | Source MAC | TPID | TCI | EType | + * +-----------------------+-----------------------+-------+-------+-------+ + * ^ | | + * |<--VLAN_HLEN-->to <---VLAN_HLEN---> + * from | + * >>>>>>> v + * >>>>>>> | | | | | | | | | | | | | | | + * >>>>>>> +-----------------------+-----------------------+-------+ + * >>>>>>> | Destination MAC | Source MAC | EType | + * +-----------------------+-----------------------+-------+ + * ^ ^ + * (now part of | | + * skb->head) skb_mac_header skb->data + */ +struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) { - struct vlan_ethhdr *tag; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) - return NULL; + u8 *from = skb_mac_header(skb); + u8 *dest = from + VLAN_HLEN; - tag = vlan_eth_hdr(skb); - *tpid = ntohs(tag->h_vlan_proto); - *tci = ntohs(tag->h_vlan_TCI); - - /* skb->data points in the middle of the VLAN tag, - * after tpid and before tci. This is because so far, - * ETH_HLEN (DMAC, SMAC, EtherType) bytes were pulled. - * There are 2 bytes of VLAN tag left in skb->data, and upper - * layers expect the 'real' EtherType to be consumed as well. - * Coincidentally, a VLAN header is also of the same size as - * the number of bytes that need to be pulled. - */ - skb_pull_rcsum(skb, VLAN_HLEN); + memmove(dest, from, ETH_HLEN - VLAN_HLEN); + skb_pull(skb, VLAN_HLEN); + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + skb_pull_rcsum(skb, ETH_HLEN); return skb; } -EXPORT_SYMBOL_GPL(dsa_8021q_rcv); +EXPORT_SYMBOL_GPL(dsa_8021q_remove_header); static const struct dsa_device_ops dsa_8021q_netdev_ops = { .name = "8021q", diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index d43737e6c3fb..77eeea004e92 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -66,17 +66,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) { - struct ethhdr *hdr = eth_hdr(skb); - u64 source_port, switch_id; - struct sk_buff *nskb; + int source_port, switch_id; + struct vlan_ethhdr *hdr; u16 tpid, vid, tci; bool is_tagged; - nskb = dsa_8021q_rcv(skb, netdev, pt, &tpid, &tci); - is_tagged = (nskb && tpid == ETH_P_SJA1105); - - skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - vid = tci & VLAN_VID_MASK; + hdr = vlan_eth_hdr(skb); + tpid = ntohs(hdr->h_vlan_proto); + is_tagged = (tpid == ETH_P_SJA1105); skb->offload_fwd_mark = 1; @@ -92,8 +89,11 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, hdr->h_dest[4] = 0; } else { /* Normal traffic path. */ + tci = ntohs(hdr->h_vlan_TCI); + vid = tci & VLAN_VID_MASK; source_port = dsa_8021q_rx_source_port(vid); switch_id = dsa_8021q_rx_switch_id(vid); + skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); @@ -106,8 +106,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN). */ if (is_tagged) - memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - VLAN_HLEN, - ETH_HLEN - VLAN_HLEN); + skb = dsa_8021q_remove_header(skb); return skb; } -- cgit v1.2.3 From 47ed985e97f513b7746270e8c5d1f3a3f959b2da Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 8 Jun 2019 15:04:35 +0300 Subject: net: dsa: sja1105: Add logic for TX timestamping On TX, timestamping is performed synchronously from the port_deferred_xmit worker thread. In management routes, the switch is requested to take egress timestamps (again partial), which are reconstructed and appended to a clone of the skb that was just sent. The cloning is done by DSA and we retrieve the pointer from the structure that DSA keeps in skb->cb. Then these clones are enqueued to the socket's error queue for application-level processing. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 10 ++++ drivers/net/dsa/sja1105/sja1105_main.c | 55 ++++++++++++++++- drivers/net/dsa/sja1105/sja1105_ptp.c | 106 +++++++++++++++++++++++++++++++++ drivers/net/dsa/sja1105/sja1105_ptp.h | 17 ++++++ drivers/net/dsa/sja1105/sja1105_spi.c | 14 +++++ include/linux/dsa/sja1105.h | 1 + 6 files changed, 201 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 3c6296203c21..5a4f83a3417b 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -33,6 +33,7 @@ struct sja1105_regs { u64 ptpclk; u64 ptpclkrate; u64 ptptsclk; + u64 ptpegr_ts[SJA1105_NUM_PORTS]; u64 pad_mii_tx[SJA1105_NUM_PORTS]; u64 cgu_idiv[SJA1105_NUM_PORTS]; u64 rgmii_pad_mii_tx[SJA1105_NUM_PORTS]; @@ -56,6 +57,15 @@ struct sja1105_info { * switch core and device_id) */ u64 part_no; + /* E/T and P/Q/R/S have partial timestamps of different sizes. + * They must be reconstructed on both families anyway to get the full + * 64-bit values back. + */ + int ptp_ts_bits; + /* Also SPI commands are of different sizes to retrieve + * the egress timestamps. + */ + int ptpegr_ts_bytes; const struct sja1105_dynamic_table_ops *dyn_ops; const struct sja1105_table_ops *static_ops; const struct sja1105_regs *regs; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index f897fdb12930..121ceccd8107 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1565,7 +1565,7 @@ static int sja1105_setup(struct dsa_switch *ds) } static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot, - struct sk_buff *skb) + struct sk_buff *skb, bool takets) { struct sja1105_mgmt_entry mgmt_route = {0}; struct sja1105_private *priv = ds->priv; @@ -1578,6 +1578,8 @@ static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot, mgmt_route.macaddr = ether_addr_to_u64(hdr->h_dest); mgmt_route.destports = BIT(port); mgmt_route.enfport = 1; + mgmt_route.tsreg = 0; + mgmt_route.takets = takets; rc = sja1105_dynamic_config_write(priv, BLK_IDX_MGMT_ROUTE, slot, &mgmt_route, true); @@ -1629,7 +1631,11 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; struct sja1105_port *sp = &priv->ports[port]; + struct skb_shared_hwtstamps shwt = {0}; int slot = sp->mgmt_slot; + struct sk_buff *clone; + u64 now, ts; + int rc; /* The tragic fact about the switch having 4x2 slots for installing * management routes is that all of them except one are actually @@ -1647,8 +1653,36 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port, */ mutex_lock(&priv->mgmt_lock); - sja1105_mgmt_xmit(ds, port, slot, skb); + /* The clone, if there, was made by dsa_skb_tx_timestamp */ + clone = DSA_SKB_CB(skb)->clone; + + sja1105_mgmt_xmit(ds, port, slot, skb, !!clone); + + if (!clone) + goto out; + + skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; + + mutex_lock(&priv->ptp_lock); + + now = priv->tstamp_cc.read(&priv->tstamp_cc); + + rc = sja1105_ptpegr_ts_poll(priv, slot, &ts); + if (rc < 0) { + dev_err(ds->dev, "xmit: timed out polling for tstamp\n"); + kfree_skb(clone); + goto out_unlock_ptp; + } + + ts = sja1105_tstamp_reconstruct(priv, now, ts); + ts = timecounter_cyc2time(&priv->tstamp_tc, ts); + shwt.hwtstamp = ns_to_ktime(ts); + skb_complete_tx_timestamp(clone, &shwt); + +out_unlock_ptp: + mutex_unlock(&priv->ptp_lock); +out: mutex_unlock(&priv->mgmt_lock); return NETDEV_TX_OK; } @@ -1677,6 +1711,22 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } +/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone + * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit + * callback, where we will timestamp it synchronously. + */ +bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_port *sp = &priv->ports[port]; + + if (!sp->hwts_tx_en) + return false; + + return true; +} + static const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, @@ -1701,6 +1751,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_mdb_add = sja1105_mdb_add, .port_mdb_del = sja1105_mdb_del, .port_deferred_xmit = sja1105_port_deferred_xmit, + .port_txtstamp = sja1105_port_txtstamp, }; static int sja1105_check_device_id(struct sja1105_private *priv) diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 47313a6ec932..01ecc8fb1b30 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -113,6 +113,112 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) } EXPORT_SYMBOL_GPL(sja1105pqrs_ptp_cmd); +/* The switch returns partial timestamps (24 bits for SJA1105 E/T, which wrap + * around in 0.135 seconds, and 32 bits for P/Q/R/S, wrapping around in 34.35 + * seconds). + * + * This receives the RX or TX MAC timestamps, provided by hardware as + * the lower bits of the cycle counter, sampled at the time the timestamp was + * collected. + * + * To reconstruct into a full 64-bit-wide timestamp, the cycle counter is + * read and the high-order bits are filled in. + * + * Must be called within one wraparound period of the partial timestamp since + * it was generated by the MAC. + */ +u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now, + u64 ts_partial) +{ + u64 partial_tstamp_mask = CYCLECOUNTER_MASK(priv->info->ptp_ts_bits); + u64 ts_reconstructed; + + ts_reconstructed = (now & ~partial_tstamp_mask) | ts_partial; + + /* Check lower bits of current cycle counter against the timestamp. + * If the current cycle counter is lower than the partial timestamp, + * then wraparound surely occurred and must be accounted for. + */ + if ((now & partial_tstamp_mask) <= ts_partial) + ts_reconstructed -= (partial_tstamp_mask + 1); + + return ts_reconstructed; +} +EXPORT_SYMBOL_GPL(sja1105_tstamp_reconstruct); + +/* Reads the SPI interface for an egress timestamp generated by the switch + * for frames sent using management routes. + * + * SJA1105 E/T layout of the 4-byte SPI payload: + * + * 31 23 15 7 0 + * | | | | | + * +-----+-----+-----+ ^ + * ^ | + * | | + * 24-bit timestamp Update bit + * + * + * SJA1105 P/Q/R/S layout of the 8-byte SPI payload: + * + * 31 23 15 7 0 63 55 47 39 32 + * | | | | | | | | | | + * ^ +-----+-----+-----+-----+ + * | ^ + * | | + * Update bit 32-bit timestamp + * + * Notice that the update bit is in the same place. + * To have common code for E/T and P/Q/R/S for reading the timestamp, + * we need to juggle with the offset and the bit indices. + */ +int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) +{ + const struct sja1105_regs *regs = priv->info->regs; + int tstamp_bit_start, tstamp_bit_end; + int timeout = 10; + u8 packed_buf[8]; + u64 update; + int rc; + + do { + rc = sja1105_spi_send_packed_buf(priv, SPI_READ, + regs->ptpegr_ts[port], + packed_buf, + priv->info->ptpegr_ts_bytes); + if (rc < 0) + return rc; + + sja1105_unpack(packed_buf, &update, 0, 0, + priv->info->ptpegr_ts_bytes); + if (update) + break; + + usleep_range(10, 50); + } while (--timeout); + + if (!timeout) + return -ETIMEDOUT; + + /* Point the end bit to the second 32-bit word on P/Q/R/S, + * no-op on E/T. + */ + tstamp_bit_end = (priv->info->ptpegr_ts_bytes - 4) * 8; + /* Shift the 24-bit timestamp on E/T to be collected from 31:8. + * No-op on P/Q/R/S. + */ + tstamp_bit_end += 32 - priv->info->ptp_ts_bits; + tstamp_bit_start = tstamp_bit_end + priv->info->ptp_ts_bits - 1; + + *ts = 0; + + sja1105_unpack(packed_buf, ts, tstamp_bit_start, tstamp_bit_end, + priv->info->ptpegr_ts_bytes); + + return 0; +} +EXPORT_SYMBOL_GPL(sja1105_ptpegr_ts_poll); + int sja1105_ptp_reset(struct sja1105_private *priv) { struct dsa_switch *ds = priv->ds; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 137ffbb0a233..af456b0a4d27 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -10,6 +10,8 @@ int sja1105_ptp_clock_register(struct sja1105_private *priv); void sja1105_ptp_clock_unregister(struct sja1105_private *priv); +int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts); + int sja1105et_ptp_cmd(const void *ctx, const void *data); int sja1105pqrs_ptp_cmd(const void *ctx, const void *data); @@ -17,6 +19,9 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data); int sja1105_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts); +u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now, + u64 ts_partial); + int sja1105_ptp_reset(struct sja1105_private *priv); #else @@ -31,6 +36,18 @@ static inline void sja1105_ptp_clock_unregister(struct sja1105_private *priv) return; } +static inline int +sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) +{ + return 0; +} + +static inline u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, + u64 now, u64 ts_partial) +{ + return 0; +} + static inline int sja1105_ptp_reset(struct sja1105_private *priv) { return 0; diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index a0d08e6c22ff..d729a0f0b28e 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -514,6 +514,7 @@ static struct sja1105_regs sja1105et_regs = { .rgmii_tx_clk = {0x100016, 0x10001D, 0x100024, 0x10002B, 0x100032}, .rmii_ref_clk = {0x100015, 0x10001C, 0x100023, 0x10002A, 0x100031}, .rmii_ext_tx_clk = {0x100018, 0x10001F, 0x100026, 0x10002D, 0x100034}, + .ptpegr_ts = {0xC0, 0xC2, 0xC4, 0xC6, 0xC8}, .ptp_control = 0x17, .ptpclk = 0x18, /* Spans 0x18 to 0x19 */ .ptpclkrate = 0x1A, @@ -544,6 +545,7 @@ static struct sja1105_regs sja1105pqrs_regs = { .rmii_ref_clk = {0x100015, 0x10001B, 0x100021, 0x100027, 0x10002D}, .rmii_ext_tx_clk = {0x100017, 0x10001D, 0x100023, 0x100029, 0x10002F}, .qlevel = {0x604, 0x614, 0x624, 0x634, 0x644}, + .ptpegr_ts = {0xC0, 0xC4, 0xC8, 0xCC, 0xD0}, .ptp_control = 0x18, .ptpclk = 0x19, .ptpclkrate = 0x1B, @@ -555,6 +557,8 @@ struct sja1105_info sja1105e_info = { .part_no = SJA1105ET_PART_NO, .static_ops = sja1105e_table_ops, .dyn_ops = sja1105et_dyn_ops, + .ptp_ts_bits = 24, + .ptpegr_ts_bytes = 4, .reset_cmd = sja1105et_reset_cmd, .fdb_add_cmd = sja1105et_fdb_add, .fdb_del_cmd = sja1105et_fdb_del, @@ -567,6 +571,8 @@ struct sja1105_info sja1105t_info = { .part_no = SJA1105ET_PART_NO, .static_ops = sja1105t_table_ops, .dyn_ops = sja1105et_dyn_ops, + .ptp_ts_bits = 24, + .ptpegr_ts_bytes = 4, .reset_cmd = sja1105et_reset_cmd, .fdb_add_cmd = sja1105et_fdb_add, .fdb_del_cmd = sja1105et_fdb_del, @@ -579,6 +585,8 @@ struct sja1105_info sja1105p_info = { .part_no = SJA1105P_PART_NO, .static_ops = sja1105p_table_ops, .dyn_ops = sja1105pqrs_dyn_ops, + .ptp_ts_bits = 32, + .ptpegr_ts_bytes = 8, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, .fdb_del_cmd = sja1105pqrs_fdb_del, @@ -591,6 +599,8 @@ struct sja1105_info sja1105q_info = { .part_no = SJA1105Q_PART_NO, .static_ops = sja1105q_table_ops, .dyn_ops = sja1105pqrs_dyn_ops, + .ptp_ts_bits = 32, + .ptpegr_ts_bytes = 8, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, .fdb_del_cmd = sja1105pqrs_fdb_del, @@ -603,6 +613,8 @@ struct sja1105_info sja1105r_info = { .part_no = SJA1105R_PART_NO, .static_ops = sja1105r_table_ops, .dyn_ops = sja1105pqrs_dyn_ops, + .ptp_ts_bits = 32, + .ptpegr_ts_bytes = 8, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, .fdb_del_cmd = sja1105pqrs_fdb_del, @@ -616,6 +628,8 @@ struct sja1105_info sja1105s_info = { .static_ops = sja1105s_table_ops, .dyn_ops = sja1105pqrs_dyn_ops, .regs = &sja1105pqrs_regs, + .ptp_ts_bits = 32, + .ptpegr_ts_bytes = 8, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, .fdb_del_cmd = sja1105pqrs_fdb_del, diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index e46e18c47d41..5a956f335022 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -22,6 +22,7 @@ struct sja1105_port { struct dsa_port *dp; + bool hwts_tx_en; int mgmt_slot; }; -- cgit v1.2.3 From d3f9b90bf19fad05889e4bead7dc1b336da56118 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 8 Jun 2019 15:04:36 +0300 Subject: net: dsa: sja1105: Build a minimal understanding of meta frames Meta frames are sent on the CPU port by the switch if RX timestamping is enabled. They contain a partial timestamp of the previous frame. They are Ethernet frames with the Ethernet header constructed out of: - SJA1105_META_DMAC - SJA1105_META_SMAC - ETH_P_SJA1105_META The Ethernet payload will be decoded in a follow-up patch. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 11 +++++++++++ net/dsa/tag_sja1105.c | 15 +++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 5a956f335022..cc4a909d1007 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -12,6 +12,7 @@ #include #define ETH_P_SJA1105 ETH_P_DSA_8021Q +#define ETH_P_SJA1105_META 0x0008 /* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */ #define SJA1105_LINKLOCAL_FILTER_A 0x0180C2000000ull @@ -20,6 +21,16 @@ #define SJA1105_LINKLOCAL_FILTER_B 0x011B19000000ull #define SJA1105_LINKLOCAL_FILTER_B_MASK 0xFFFFFF000000ull +/* Source and Destination MAC of follow-up meta frames. + * Whereas the choice of SMAC only affects the unique identification of the + * switch as sender of meta frames, the DMAC must be an address that is present + * in the DSA master port's multicast MAC filter. + * 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588 + * over L2 use this address for some purpose already. + */ +#define SJA1105_META_SMAC 0x222222222222ull +#define SJA1105_META_DMAC 0x0180C200000Eull + struct sja1105_port { struct dsa_port *dp; bool hwts_tx_en; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index cd8e0bfb5e75..0beb52518d56 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -22,6 +22,21 @@ static inline bool sja1105_is_link_local(const struct sk_buff *skb) return false; } +static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) +{ + const struct ethhdr *hdr = eth_hdr(skb); + u64 smac = ether_addr_to_u64(hdr->h_source); + u64 dmac = ether_addr_to_u64(hdr->h_dest); + + if (smac != SJA1105_META_SMAC) + return false; + if (dmac != SJA1105_META_DMAC) + return false; + if (ntohs(hdr->h_proto) != ETH_P_SJA1105_META) + return false; + return true; +} + /* This is the first time the tagger sees the frame on RX. * Figure out if we can decode it, and if we can, annotate skb->cb with how we * plan to do that, so we don't need to check again in the rcv function. -- cgit v1.2.3 From 844d7edc6a34ae3a8236f1306e4f2615c8db1eac Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 8 Jun 2019 15:04:40 +0300 Subject: net: dsa: sja1105: Add a global sja1105_tagger_data structure This will be used to keep state for RX timestamping. It is global because the switch serializes timestampable and meta frames when trapping them towards the CPU port (lower port indices have higher priority) and therefore having one state machine per port would create unnecessary complications. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 1 + drivers/net/dsa/sja1105/sja1105_main.c | 5 +++++ include/linux/dsa/sja1105.h | 15 +++++++++++++++ 3 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 5a4f83a3417b..0fc6fe9ada87 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -103,6 +103,7 @@ struct sja1105_private { * the switch doesn't confuse them with one another. */ struct mutex mgmt_lock; + struct sja1105_tagger_data tagger_data; }; #include "sja1105_dynamic_config.h" diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index d129997174bb..3c11142f1c67 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1828,6 +1828,7 @@ static int sja1105_check_device_id(struct sja1105_private *priv) static int sja1105_probe(struct spi_device *spi) { + struct sja1105_tagger_data *tagger_data; struct device *dev = &spi->dev; struct sja1105_private *priv; struct dsa_switch *ds; @@ -1882,12 +1883,16 @@ static int sja1105_probe(struct spi_device *spi) ds->priv = priv; priv->ds = ds; + tagger_data = &priv->tagger_data; + skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); + /* Connections between dsa_port and sja1105_port */ for (i = 0; i < SJA1105_NUM_PORTS; i++) { struct sja1105_port *sp = &priv->ports[i]; ds->ports[i].priv = sp; sp->dp = &ds->ports[i]; + sp->data = tagger_data; } mutex_init(&priv->mgmt_lock); diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index cc4a909d1007..2c4fce4eaf0d 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -31,7 +31,22 @@ #define SJA1105_META_SMAC 0x222222222222ull #define SJA1105_META_DMAC 0x0180C200000Eull +/* Global tagger data: each struct sja1105_port has a reference to + * the structure defined in struct sja1105_private. + */ +struct sja1105_tagger_data { + struct sk_buff_head skb_rxtstamp_queue; + struct work_struct rxtstamp_work; + struct sk_buff *stampable_skb; + /* Protects concurrent access to the meta state machine + * from taggers running on multiple ports on SMP systems + */ + spinlock_t meta_lock; + bool hwts_rx_en; +}; + struct sja1105_port { + struct sja1105_tagger_data *data; struct dsa_port *dp; bool hwts_tx_en; int mgmt_slot; -- cgit v1.2.3 From f3097be21bf17ae8785eea009cbc424f16611d9a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 8 Jun 2019 15:04:42 +0300 Subject: net: dsa: sja1105: Add a state machine for RX timestamping Meta frame reception relies on the hardware keeping its promise that it will send no other traffic towards the CPU port between a link-local frame and a meta frame. Otherwise there is no other way to associate the meta frame with the link-local frame it's holding a timestamp of. The receive function is made stateful, and buffers a timestampable frame until its meta frame arrives, then merges the two, drops the meta and releases the link-local frame up the stack. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 62 +++++++++++++++++ include/linux/dsa/sja1105.h | 7 ++ net/dsa/tag_sja1105.c | 121 ++++++++++++++++++++++++++++++++- 3 files changed, 189 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 2b804eeca390..8963b21b3061 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1600,6 +1600,14 @@ static int sja1105_setup(struct dsa_switch *ds) return sja1105_setup_8021q_tagging(ds, true); } +static void sja1105_teardown(struct dsa_switch *ds) +{ + struct sja1105_private *priv = ds->priv; + + cancel_work_sync(&priv->tagger_data.rxtstamp_work); + skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue); +} + static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot, struct sk_buff *skb, bool takets) { @@ -1747,6 +1755,57 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } +#define to_tagger(d) \ + container_of((d), struct sja1105_tagger_data, rxtstamp_work) +#define to_sja1105(d) \ + container_of((d), struct sja1105_private, tagger_data) + +static void sja1105_rxtstamp_work(struct work_struct *work) +{ + struct sja1105_tagger_data *data = to_tagger(work); + struct sja1105_private *priv = to_sja1105(data); + struct sk_buff *skb; + u64 now; + + mutex_lock(&priv->ptp_lock); + + now = priv->tstamp_cc.read(&priv->tstamp_cc); + + while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) { + struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); + u64 ts; + + *shwt = (struct skb_shared_hwtstamps) {0}; + + ts = SJA1105_SKB_CB(skb)->meta_tstamp; + ts = sja1105_tstamp_reconstruct(priv, now, ts); + ts = timecounter_cyc2time(&priv->tstamp_tc, ts); + + shwt->hwtstamp = ns_to_ktime(ts); + netif_rx_ni(skb); + } + + mutex_unlock(&priv->ptp_lock); +} + +/* Called from dsa_skb_defer_rx_timestamp */ +bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_tagger_data *data = &priv->tagger_data; + + if (!data->hwts_rx_en) + return false; + + /* We need to read the full PTP clock to reconstruct the Rx + * timestamp. For that we need a sleepable context. + */ + skb_queue_tail(&data->skb_rxtstamp_queue, skb); + schedule_work(&data->rxtstamp_work); + return true; +} + /* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit * callback, where we will timestamp it synchronously. @@ -1766,6 +1825,7 @@ bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, static const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, + .teardown = sja1105_teardown, .set_ageing_time = sja1105_set_ageing_time, .phylink_validate = sja1105_phylink_validate, .phylink_mac_config = sja1105_mac_config, @@ -1787,6 +1847,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_mdb_add = sja1105_mdb_add, .port_mdb_del = sja1105_mdb_del, .port_deferred_xmit = sja1105_port_deferred_xmit, + .port_rxtstamp = sja1105_port_rxtstamp, .port_txtstamp = sja1105_port_txtstamp, }; @@ -1885,6 +1946,7 @@ static int sja1105_probe(struct spi_device *spi) tagger_data = &priv->tagger_data; skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); + INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work); /* Connections between dsa_port and sja1105_port */ for (i = 0; i < SJA1105_NUM_PORTS; i++) { diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 2c4fce4eaf0d..79435cfc20eb 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -45,6 +45,13 @@ struct sja1105_tagger_data { bool hwts_rx_en; }; +struct sja1105_skb_cb { + u32 meta_tstamp; +}; + +#define SJA1105_SKB_CB(skb) \ + ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb)) + struct sja1105_port { struct sja1105_tagger_data *data; struct dsa_port *dp; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 5b51e96130c7..1d96c9d4a8e9 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -110,6 +110,124 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); } +static void sja1105_transfer_meta(struct sk_buff *skb, + const struct sja1105_meta *meta) +{ + struct ethhdr *hdr = eth_hdr(skb); + + hdr->h_dest[3] = meta->dmac_byte_3; + hdr->h_dest[4] = meta->dmac_byte_4; + SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp; +} + +/* This is a simple state machine which follows the hardware mechanism of + * generating RX timestamps: + * + * After each timestampable skb (all traffic for which send_meta1 and + * send_meta0 is true, aka all MAC-filtered link-local traffic) a meta frame + * containing a partial timestamp is immediately generated by the switch and + * sent as a follow-up to the link-local frame on the CPU port. + * + * The meta frames have no unique identifier (such as sequence number) by which + * one may pair them to the correct timestampable frame. + * Instead, the switch has internal logic that ensures no frames are sent on + * the CPU port between a link-local timestampable frame and its corresponding + * meta follow-up. It also ensures strict ordering between ports (lower ports + * have higher priority towards the CPU port). For this reason, a per-port + * data structure is not needed/desirable. + * + * This function pairs the link-local frame with its partial timestamp from the + * meta follow-up frame. The full timestamp will be reconstructed later in a + * work queue. + */ +static struct sk_buff +*sja1105_rcv_meta_state_machine(struct sk_buff *skb, + struct sja1105_meta *meta, + bool is_link_local, + bool is_meta) +{ + struct sja1105_port *sp; + struct dsa_port *dp; + + dp = dsa_slave_to_port(skb->dev); + sp = dp->priv; + + /* Step 1: A timestampable frame was received. + * Buffer it until we get its meta frame. + */ + if (is_link_local && sp->data->hwts_rx_en) { + spin_lock(&sp->data->meta_lock); + /* Was this a link-local frame instead of the meta + * that we were expecting? + */ + if (sp->data->stampable_skb) { + dev_err_ratelimited(dp->ds->dev, + "Expected meta frame, is %12llx " + "in the DSA master multicast filter?\n", + SJA1105_META_DMAC); + } + + /* Hold a reference to avoid dsa_switch_rcv + * from freeing the skb. + */ + sp->data->stampable_skb = skb_get(skb); + spin_unlock(&sp->data->meta_lock); + + /* Tell DSA we got nothing */ + return NULL; + + /* Step 2: The meta frame arrived. + * Time to take the stampable skb out of the closet, annotate it + * with the partial timestamp, and pretend that we received it + * just now (basically masquerade the buffered frame as the meta + * frame, which serves no further purpose). + */ + } else if (is_meta) { + struct sk_buff *stampable_skb; + + spin_lock(&sp->data->meta_lock); + + stampable_skb = sp->data->stampable_skb; + sp->data->stampable_skb = NULL; + + /* Was this a meta frame instead of the link-local + * that we were expecting? + */ + if (!stampable_skb) { + dev_err_ratelimited(dp->ds->dev, + "Unexpected meta frame\n"); + spin_unlock(&sp->data->meta_lock); + return NULL; + } + + if (stampable_skb->dev != skb->dev) { + dev_err_ratelimited(dp->ds->dev, + "Meta frame on wrong port\n"); + spin_unlock(&sp->data->meta_lock); + return NULL; + } + + /* Free the meta frame and give DSA the buffered stampable_skb + * for further processing up the network stack. + */ + kfree_skb(skb); + + skb = skb_copy(stampable_skb, GFP_ATOMIC); + if (!skb) { + dev_err_ratelimited(dp->ds->dev, + "Failed to copy stampable skb\n"); + return NULL; + } + sja1105_transfer_meta(skb, meta); + /* The cached copy will be freed now */ + skb_unref(stampable_skb); + + spin_unlock(&sp->data->meta_lock); + } + + return skb; +} + static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) @@ -167,7 +285,8 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, if (is_tagged) skb = dsa_8021q_remove_header(skb); - return skb; + return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, + is_meta); } static struct dsa_device_ops sja1105_netdev_ops = { -- cgit v1.2.3 From 7f192e3cd316ba58c88dfa26796cf77789dd9872 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 25 May 2019 11:36:41 +0200 Subject: fork: add clone3 This adds the clone3 system call. As mentioned several times already (cf. [7], [8]) here's the promised patchset for clone3(). We recently merged the CLONE_PIDFD patchset (cf. [1]). It took the last free flag from clone(). Independent of the CLONE_PIDFD patchset a time namespace has been discussed at Linux Plumber Conference last year and has been sent out and reviewed (cf. [5]). It is expected that it will go upstream in the not too distant future. However, it relies on the addition of the CLONE_NEWTIME flag to clone(). The only other good candidate - CLONE_DETACHED - is currently not recyclable as we have identified at least two large or widely used codebases that currently pass this flag (cf. [2], [3], and [4]). Given that CLONE_PIDFD grabbed the last clone() flag the time namespace is effectively blocked. clone3() has the advantage that it will unblock this patchset again. In general, clone3() is extensible and allows for the implementation of new features. The idea is to keep clone3() very simple and close to the original clone(), specifically, to keep on supporting old clone()-based workloads. We know there have been various creative proposals how a new process creation syscall or even api is supposed to look like. Some people even going so far as to argue that the traditional fork()+exec() split should be abandoned in favor of an in-kernel version of spawn(). Independent of whether or not we personally think spawn() is a good idea this patchset has and does not want to have anything to do with this. One stance we take is that there's no real good alternative to clone()+exec() and we need and want to support this model going forward; independent of spawn(). The following requirements guided clone3(): - bump the number of available flags - move arguments that are currently passed as separate arguments in clone() into a dedicated struct clone_args - choose a struct layout that is easy to handle on 32 and on 64 bit - choose a struct layout that is extensible - give new flags that currently need to abuse another flag's dedicated return argument in clone() their own dedicated return argument (e.g. CLONE_PIDFD) - use a separate kernel internal struct kernel_clone_args that is properly typed according to current kernel conventions in fork.c and is different from the uapi struct clone_args - port _do_fork() to use kernel_clone_args so that all process creation syscalls such as fork(), vfork(), clone(), and clone3() behave identical (Arnd suggested, that we can probably also port do_fork() itself in a separate patchset.) - ease of transition for userspace from clone() to clone3() This very much means that we do *not* remove functionality that userspace currently relies on as the latter is a good way of creating a syscall that won't be adopted. - do not try to be clever or complex: keep clone3() as dumb as possible In accordance with Linus suggestions (cf. [11]), clone3() has the following signature: /* uapi */ struct clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; __aligned_u64 parent_tid; __aligned_u64 exit_signal; __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; }; /* kernel internal */ struct kernel_clone_args { u64 flags; int __user *pidfd; int __user *child_tid; int __user *parent_tid; int exit_signal; unsigned long stack; unsigned long stack_size; unsigned long tls; }; long sys_clone3(struct clone_args __user *uargs, size_t size) clone3() cleanly supports all of the supported flags from clone() and thus all legacy workloads. The advantage of sticking close to the old clone() is the low cost for userspace to switch to this new api. Quite a lot of userspace apis (e.g. pthreads) are based on the clone() syscall. With the new clone3() syscall supporting all of the old workloads and opening up the ability to add new features should make switching to it for userspace more appealing. In essence, glibc can just write a simple wrapper to switch from clone() to clone3(). There has been some interest in this patchset already. We have received a patch from the CRIU corner for clone3() that would set the PID/TID of a restored process without /proc/sys/kernel/ns_last_pid to eliminate a race. /* User visible differences to legacy clone() */ - CLONE_DETACHED will cause EINVAL with clone3() - CSIGNAL is deprecated It is superseeded by a dedicated "exit_signal" argument in struct clone_args freeing up space for additional flags. This is based on a suggestion from Andrei and Linus (cf. [9] and [10]) /* References */ [1]: b3e5838252665ee4cfa76b82bdf1198dca81e5be [2]: https://dxr.mozilla.org/mozilla-central/source/security/sandbox/linux/SandboxFilter.cpp#343 [3]: https://git.musl-libc.org/cgit/musl/tree/src/thread/pthread_create.c#n233 [4]: https://sources.debian.org/src/blcr/0.8.5-2.3/cr_module/cr_dump_self.c/?hl=740#L740 [5]: https://lore.kernel.org/lkml/20190425161416.26600-1-dima@arista.com/ [6]: https://lore.kernel.org/lkml/20190425161416.26600-2-dima@arista.com/ [7]: https://lore.kernel.org/lkml/CAHrFyr5HxpGXA2YrKza-oB-GGwJCqwPfyhD-Y5wbktWZdt0sGQ@mail.gmail.com/ [8]: https://lore.kernel.org/lkml/20190524102756.qjsjxukuq2f4t6bo@brauner.io/ [9]: https://lore.kernel.org/lkml/20190529222414.GA6492@gmail.com/ [10]: https://lore.kernel.org/lkml/CAHk-=whQP-Ykxi=zSYaV9iXsHsENa+2fdj-zYKwyeyed63Lsfw@mail.gmail.com/ [11]: https://lore.kernel.org/lkml/CAHk-=wieuV4hGwznPsX-8E0G2FKhx3NjZ9X3dTKh5zKd+iqOBw@mail.gmail.com/ Suggested-by: Linus Torvalds Signed-off-by: Christian Brauner Acked-by: Arnd Bergmann Acked-by: Serge Hallyn Cc: Kees Cook Cc: Pavel Emelyanov Cc: Jann Horn Cc: David Howells Cc: Andrew Morton Cc: Oleg Nesterov Cc: Adrian Reber Cc: Linus Torvalds Cc: Andrei Vagin Cc: Al Viro Cc: Florian Weimer Cc: linux-api@vger.kernel.org --- arch/x86/ia32/sys_ia32.c | 12 ++- include/linux/sched/task.h | 17 +++- include/linux/syscalls.h | 4 + include/uapi/linux/sched.h | 16 ++++ kernel/fork.c | 201 ++++++++++++++++++++++++++++++++++----------- 5 files changed, 199 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index a43212036257..64a6c952091e 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, unsigned long, tls_val, int __user *, child_tidptr) { - return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, - tls_val); + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = newsp, + .tls = tls_val, + }; + + return _do_fork(&args); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index f1227f2c38a4..109a0df5af39 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -8,11 +8,26 @@ */ #include +#include struct task_struct; struct rusage; union thread_union; +/* All the bits taken by the old clone syscall. */ +#define CLONE_LEGACY_FLAGS 0xffffffffULL + +struct kernel_clone_args { + u64 flags; + int __user *pidfd; + int __user *child_tid; + int __user *parent_tid; + int exit_signal; + unsigned long stack; + unsigned long stack_size; + unsigned long tls; +}; + /* * This serializes "schedule()" and also protects * the run-queue from deletions/modifications (but @@ -73,7 +88,7 @@ extern void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); +extern long _do_fork(struct kernel_clone_args *kargs); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e2870fe1be5b..60a81f374ca3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -70,6 +70,7 @@ struct sigaltstack; struct rseq; union bpf_attr; struct io_uring_params; +struct clone_args; #include #include @@ -852,6 +853,9 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, unsigned long); #endif #endif + +asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size); + asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp); diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index ed4ee170bee2..f5331dbdcaa2 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_SCHED_H #define _UAPI_LINUX_SCHED_H +#include + /* * cloning flags: */ @@ -31,6 +33,20 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ +/* + * Arguments for the clone3 syscall + */ +struct clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; +}; + /* * Scheduling policies */ diff --git a/kernel/fork.c b/kernel/fork.c index b4cba953040a..08ff131f26b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1760,19 +1760,15 @@ static __always_inline void delayed_free_task(struct task_struct *tsk) * flags). The actual kick-off is left to the caller. */ static __latent_entropy struct task_struct *copy_process( - unsigned long clone_flags, - unsigned long stack_start, - unsigned long stack_size, - int __user *parent_tidptr, - int __user *child_tidptr, struct pid *pid, int trace, - unsigned long tls, - int node) + int node, + struct kernel_clone_args *args) { int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; + u64 clone_flags = args->flags; /* * Don't allow sharing the root directory with processes in a different @@ -1821,27 +1817,12 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* - * - CLONE_PARENT_SETTID is useless for pidfds and also - * parent_tidptr is used to return pidfds. * - CLONE_DETACHED is blocked so that we can potentially * reuse it later for CLONE_PIDFD. * - CLONE_THREAD is blocked until someone really needs it. */ - if (clone_flags & - (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) - return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) + if (clone_flags & (CLONE_DETACHED | CLONE_THREAD)) return ERR_PTR(-EINVAL); } @@ -1874,11 +1855,11 @@ static __latent_entropy struct task_struct *copy_process( * p->set_child_tid which is (ab)used as a kthread's data pointer for * kernel threads (PF_KTHREAD). */ - p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; /* * Clear TID on mm_release()? */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; + p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; ftrace_graph_init_task(p); @@ -2037,7 +2018,8 @@ static __latent_entropy struct task_struct *copy_process( retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); + retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p, + args->tls); if (retval) goto bad_fork_cleanup_io; @@ -2062,7 +2044,7 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_free_pid; pidfd = retval; - retval = put_user(pidfd, parent_tidptr); + retval = put_user(pidfd, args->pidfd); if (retval) goto bad_fork_put_pidfd; } @@ -2105,7 +2087,7 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else - p->exit_signal = (clone_flags & CSIGNAL); + p->exit_signal = args->exit_signal; p->group_leader = p; p->tgid = p->pid; } @@ -2313,8 +2295,11 @@ static inline void init_idle_pids(struct task_struct *idle) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, - cpu_to_node(cpu)); + struct kernel_clone_args args = { + .flags = CLONE_VM, + }; + + task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); if (!IS_ERR(task)) { init_idle_pids(task); init_idle(task, cpu); @@ -2334,13 +2319,9 @@ struct mm_struct *copy_init_mm(void) * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ -long _do_fork(unsigned long clone_flags, - unsigned long stack_start, - unsigned long stack_size, - int __user *parent_tidptr, - int __user *child_tidptr, - unsigned long tls) +long _do_fork(struct kernel_clone_args *args) { + u64 clone_flags = args->flags; struct completion vfork; struct pid *pid; struct task_struct *p; @@ -2356,7 +2337,7 @@ long _do_fork(unsigned long clone_flags, if (!(clone_flags & CLONE_UNTRACED)) { if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; - else if ((clone_flags & CSIGNAL) != SIGCHLD) + else if (args->exit_signal != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; @@ -2365,8 +2346,7 @@ long _do_fork(unsigned long clone_flags, trace = 0; } - p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, - child_tidptr, NULL, trace, tls, NUMA_NO_NODE); + p = copy_process(NULL, trace, NUMA_NO_NODE, args); add_latent_entropy(); if (IS_ERR(p)) @@ -2382,7 +2362,7 @@ long _do_fork(unsigned long clone_flags, nr = pid_vnr(pid); if (clone_flags & CLONE_PARENT_SETTID) - put_user(nr, parent_tidptr); + put_user(nr, args->parent_tid); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; @@ -2414,8 +2394,16 @@ long do_fork(unsigned long clone_flags, int __user *parent_tidptr, int __user *child_tidptr) { - return _do_fork(clone_flags, stack_start, stack_size, - parent_tidptr, child_tidptr, 0); + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = stack_start, + .stack_size = stack_size, + }; + + return _do_fork(&args); } #endif @@ -2424,15 +2412,25 @@ long do_fork(unsigned long clone_flags, */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { - return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, - (unsigned long)arg, NULL, NULL, 0); + struct kernel_clone_args args = { + .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (flags & CSIGNAL), + .stack = (unsigned long)fn, + .stack_size = (unsigned long)arg, + }; + + return _do_fork(&args); } #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU - return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); + struct kernel_clone_args args = { + .exit_signal = SIGCHLD, + }; + + return _do_fork(&args); #else /* can not support in nommu mode */ return -EINVAL; @@ -2443,8 +2441,12 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, - 0, NULL, NULL, 0); + struct kernel_clone_args args = { + .flags = CLONE_VFORK | CLONE_VM, + .exit_signal = SIGCHLD, + }; + + return _do_fork(&args); } #endif @@ -2472,7 +2474,110 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, unsigned long, tls) #endif { - return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .pidfd = parent_tidptr, + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = newsp, + .tls = tls, + }; + + /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */ + if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID)) + return -EINVAL; + + return _do_fork(&args); +} + +noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, + struct clone_args __user *uargs, + size_t size) +{ + struct clone_args args; + + if (unlikely(size > PAGE_SIZE)) + return -E2BIG; + + if (unlikely(size < sizeof(struct clone_args))) + return -EINVAL; + + if (unlikely(!access_ok(uargs, size))) + return -EFAULT; + + if (size > sizeof(struct clone_args)) { + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; + + addr = (void __user *)uargs + sizeof(struct clone_args); + end = (void __user *)uargs + size; + + for (; addr < end; addr++) { + if (get_user(val, addr)) + return -EFAULT; + if (val) + return -E2BIG; + } + + size = sizeof(struct clone_args); + } + + if (copy_from_user(&args, uargs, size)) + return -EFAULT; + + *kargs = (struct kernel_clone_args){ + .flags = args.flags, + .pidfd = u64_to_user_ptr(args.pidfd), + .child_tid = u64_to_user_ptr(args.child_tid), + .parent_tid = u64_to_user_ptr(args.parent_tid), + .exit_signal = args.exit_signal, + .stack = args.stack, + .stack_size = args.stack_size, + .tls = args.tls, + }; + + return 0; +} + +static bool clone3_args_valid(const struct kernel_clone_args *kargs) +{ + /* + * All lower bits of the flag word are taken. + * Verify that no other unknown flags are passed along. + */ + if (kargs->flags & ~CLONE_LEGACY_FLAGS) + return false; + + /* + * - make the CLONE_DETACHED bit reuseable for clone3 + * - make the CSIGNAL bits reuseable for clone3 + */ + if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) + return false; + + if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && + kargs->exit_signal) + return false; + + return true; +} + +SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) +{ + int err; + + struct kernel_clone_args kargs; + + err = copy_clone_args_from_user(&kargs, uargs, size); + if (err) + return err; + + if (!clone3_args_valid(&kargs)) + return -EINVAL; + + return _do_fork(&kargs); } #endif -- cgit v1.2.3 From f652e66fcca07e59f207bcca27c5566193feabd5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 9 Jun 2019 23:43:13 +0900 Subject: pinctrl: add include guard to pinctrl-state.h Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl-state.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl-state.h b/include/linux/pinctrl/pinctrl-state.h index a0e785815a64..635d97e9285e 100644 --- a/include/linux/pinctrl/pinctrl-state.h +++ b/include/linux/pinctrl/pinctrl-state.h @@ -3,6 +3,9 @@ * Standard pin control state definitions */ +#ifndef __LINUX_PINCTRL_PINCTRL_STATE_H +#define __LINUX_PINCTRL_PINCTRL_STATE_H + /** * @PINCTRL_STATE_DEFAULT: the state the pinctrl handle shall be put * into as default, usually this means the pins are up and ready to @@ -31,3 +34,5 @@ #define PINCTRL_STATE_INIT "init" #define PINCTRL_STATE_IDLE "idle" #define PINCTRL_STATE_SLEEP "sleep" + +#endif /* __LINUX_PINCTRL_PINCTRL_STATE_H */ -- cgit v1.2.3 From f16acc9d9b3761ae5e45219c9302f99e20919829 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 5 Jun 2019 08:04:47 -0700 Subject: vfs: introduce generic_copy_file_range() Right now if vfs_copy_file_range() does not use any offload mechanism, it falls back to calling do_splice_direct(). This fails to do basic sanity checks on the files being copied. Before we start adding this necessarily functionality to the fallback path, separate it out into generic_copy_file_range(). generic_copy_file_range() has the same prototype as ->copy_file_range() so that filesystems can use it in their custom ->copy_file_range() method if they so choose. Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 35 ++++++++++++++++++++++++++++++++--- include/linux/fs.h | 3 +++ 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/read_write.c b/fs/read_write.c index c543d965e288..676b02fae589 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1565,6 +1565,36 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +/** + * generic_copy_file_range - copy data between two files + * @file_in: file structure to read from + * @pos_in: file offset to read from + * @file_out: file structure to write data to + * @pos_out: file offset to write data to + * @len: amount of data to copy + * @flags: copy flags + * + * This is a generic filesystem helper to copy data from one file to another. + * It has no constraints on the source or destination file owners - the files + * can belong to different superblocks and different filesystem types. Short + * copies are allowed. + * + * This should be called from the @file_out filesystem, as per the + * ->copy_file_range() method. + * + * Returns the number of bytes copied or a negative error indicating the + * failure. + */ + +ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); +} +EXPORT_SYMBOL(generic_copy_file_range); + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1632,9 +1662,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, goto done; } - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); - + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); done: if (ret > 0) { fsnotify_access(file_in); diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..ea17858310ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1889,6 +1889,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, -- cgit v1.2.3 From a31713517dac0862a3f0ec9006df9160ce022b0c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:48 -0700 Subject: vfs: introduce generic_file_rw_checks() Factor out helper with some checks on in/out file that are common to clone_file_range and copy_file_range. Suggested-by: Darrick J. Wong Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 38 +++++++++++--------------------------- include/linux/fs.h | 1 + mm/filemap.c | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/read_write.c b/fs/read_write.c index b63dcb4e4fe9..f1900bdb3127 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1617,17 +1617,18 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); ssize_t ret; if (flags != 0) return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + /* this could be relaxed once a method supports cross-fs copies */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + + ret = generic_file_rw_checks(file_in, file_out); + if (unlikely(ret)) + return ret; ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) @@ -1637,15 +1638,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (unlikely(ret)) return ret; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - if (len == 0) return 0; @@ -2013,29 +2005,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - /* * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on * the same mount. Practically, they only need to be on the same file * system. */ - if (inode_in->i_sb != inode_out->i_sb) + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; + ret = generic_file_rw_checks(file_in, file_out); + if (ret < 0) + return ret; if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; diff --git a/include/linux/fs.h b/include/linux/fs.h index ea17858310ff..89b9b73eb581 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3049,6 +3049,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); +extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index df2006ba0cfa..a38619a4a6af 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3041,6 +3041,30 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, return 0; } + +/* + * Performs common checks before doing a file copy/clone + * from @file_in to @file_out. + */ +int generic_file_rw_checks(struct file *file_in, struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + + /* Don't copy dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND)) + return -EBADF; + + return 0; +} + int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) -- cgit v1.2.3 From 96e6e8f4a68df2d94800311163faa67124df24e5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:49 -0700 Subject: vfs: add missing checks to copy_file_range Like the clone and dedupe interfaces we've recently fixed, the copy_file_range() implementation is missing basic sanity, limits and boundary condition tests on the parameters that are passed to it from userspace. Create a new "generic_copy_file_checks()" function modelled on the generic_remap_checks() function to provide this missing functionality. [Amir] Shorten copy length instead of checking pos_in limits because input file size already abides by the limits. Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 3 ++- include/linux/fs.h | 3 +++ mm/filemap.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/read_write.c b/fs/read_write.c index f1900bdb3127..b0fb1176b628 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1626,7 +1626,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - ret = generic_file_rw_checks(file_in, file_out); + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, + flags); if (unlikely(ret)) return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 89b9b73eb581..e4d382c4342a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3050,6 +3050,9 @@ extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *count, unsigned int flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 44361928bbb0..aac71aef4c61 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3056,6 +3056,59 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; } +/* + * Performs necessary checks before doing a file copy + * + * Can adjust amount of bytes to copy via @req_count argument. + * Returns appropriate error code that caller should return or + * zero in case the copy should be allowed. + */ +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *req_count, unsigned int flags) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + uint64_t count = *req_count; + loff_t size_in; + int ret; + + ret = generic_file_rw_checks(file_in, file_out); + if (ret) + return ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Ensure offsets don't wrap. */ + if (pos_in + count < pos_in || pos_out + count < pos_out) + return -EOVERFLOW; + + /* Shorten the copy to EOF */ + size_in = i_size_read(inode_in); + if (pos_in >= size_in) + count = 0; + else + count = min(count, size_in - (uint64_t)pos_in); + + ret = generic_write_check_limits(file_out, pos_out, &count); + if (ret) + return ret; + + /* Don't allow overlapped copying within the same file. */ + if (inode_in == inode_out && + pos_out + count > pos_in && + pos_out < pos_in + count) + return -EINVAL; + + *req_count = count; + return 0; +} + int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) -- cgit v1.2.3 From e38f7f53c35213b1cbce70eee5de7ced17f40d4a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:49 -0700 Subject: vfs: introduce file_modified() helper The combination of file_remove_privs() and file_update_mtime() is quite common in filesystem ->write_iter() methods. Modelled after the helper file_accessed(), introduce file_modified() and use it from generic_remap_file_range_prep(). Note that the order of calling file_remove_privs() before file_update_mtime() in the helper was matched to the more common order by filesystems and not the current order in generic_remap_file_range_prep(). Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/inode.c | 20 ++++++++++++++++++++ fs/read_write.c | 21 +++------------------ include/linux/fs.h | 2 ++ 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index df6542ec3b88..4348cfb14562 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1899,6 +1899,26 @@ int file_update_time(struct file *file) } EXPORT_SYMBOL(file_update_time); +/* Caller must hold the file's inode lock */ +int file_modified(struct file *file) +{ + int err; + + /* + * Clear the security bits if the process is not being run by root. + * This keeps people from modifying setuid and setgid binaries. + */ + err = file_remove_privs(file); + if (err) + return err; + + if (unlikely(file->f_mode & FMODE_NOCMTIME)) + return 0; + + return file_update_time(file); +} +EXPORT_SYMBOL(file_modified); + int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) diff --git a/fs/read_write.c b/fs/read_write.c index b0fb1176b628..cec7e7b1f693 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1980,25 +1980,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; /* If can't alter the file contents, we're done. */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* Update the timestamps, since we can alter file contents. */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - return ret; - } + if (!(remap_flags & REMAP_FILE_DEDUP)) + ret = file_modified(file_out); - /* - * Clear the security bits if the process is not being run by - * root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - return ret; - } - - return 0; + return ret; } EXPORT_SYMBOL(generic_remap_file_range_prep); diff --git a/include/linux/fs.h b/include/linux/fs.h index e4d382c4342a..79ffa2958bd8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2177,6 +2177,8 @@ static inline void file_accessed(struct file *file) touch_atime(&file->f_path); } +extern int file_modified(struct file *file); + int sync_inode(struct inode *inode, struct writeback_control *wbc); int sync_inode_metadata(struct inode *inode, int wait); -- cgit v1.2.3 From e63d79d1ffcd2201a2dbff1d7a1184b8f3ec74cf Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Tue, 4 Jun 2019 15:29:22 +0200 Subject: dmaengine: Add Synopsys eDMA IP core driver Add Synopsys PCIe Endpoint eDMA IP core driver to kernel. This IP is generally distributed with Synopsys PCIe Endpoint IP (depends of the use and licensing agreement). This core driver, initializes and configures the eDMA IP using vma-helpers functions and dma-engine subsystem. This driver can be compile as built-in or external module in kernel. To enable this driver just select DW_EDMA option in kernel configuration, however it requires and selects automatically DMA_ENGINE and DMA_VIRTUAL_CHANNELS option too. In order to transfer data from point A to B as fast as possible this IP requires a dedicated memory space containing linked list of elements. All elements of this linked list are continuous and each one describes a data transfer (source and destination addresses, length and a control variable). For the sake of simplicity, lets assume a memory space for channel write 0 which allows about 42 elements. +---------+ | Desc #0 |-+ +---------+ | V +----------+ | Chunk #0 |-+ | CB = 1 | | +----------+ +-----+ +-----------+ +-----+ +----------+ +->| Burst #0 |->| ... |->| Burst #41 |->| llp | | +----------+ +-----+ +-----------+ +-----+ V +----------+ | Chunk #1 |-+ | CB = 0 | | +-----------+ +-----+ +-----------+ +-----+ +----------+ +->| Burst #42 |->| ... |->| Burst #83 |->| llp | | +-----------+ +-----+ +-----------+ +-----+ V +----------+ | Chunk #2 |-+ | CB = 1 | | +-----------+ +-----+ +------------+ +-----+ +----------+ +->| Burst #84 |->| ... |->| Burst #125 |->| llp | | +-----------+ +-----+ +------------+ +-----+ V +----------+ | Chunk #3 |-+ | CB = 0 | | +------------+ +-----+ +------------+ +-----+ +----------+ +->| Burst #126 |->| ... |->| Burst #129 |->| llp | +------------+ +-----+ +------------+ +-----+ Legend: - Linked list, also know as Chunk - Linked list element*, also know as Burst *CB*, also know as Change Bit, it's a control bit (and typically is toggled) that allows to easily identify and differentiate between the current linked list and the previous or the next one. - LLP, is a special element that indicates the end of the linked list element stream also informs that the next CB should be toggle On every last Burst of the Chunk (Burst #41, Burst #83, Burst #125 or even Burst #129) is set some flags on their control variable (RIE and LIE bits) that will trigger the send of "done" interruption. On the interruptions callback, is decided whether to recycle the linked list memory space by writing a new set of Bursts elements (if still exists Chunks to transfer) or is considered completed (if there is no Chunks available to transfer). On scatter-gather transfer mode, the client will submit a scatter-gather list of n (on this case 130) elements, that will be divide in multiple Chunks, each Chunk will have (on this case 42) a limited number of Bursts and after transferring all Bursts, an interrupt will be triggered, which will allow to recycle the all linked list dedicated memory again with the new information relative to the next Chunk and respective Burst associated and repeat the whole cycle again. On cyclic transfer mode, the client will submit a buffer pointer, length of it and number of repetitions, in this case each burst will correspond directly to each repetition. Each Burst can describes a data transfer from point A(source) to point B(destination) with a length that can be from 1 byte up to 4 GB. Since dedicated the memory space where the linked list will reside is limited, the whole n burst elements will be organized in several Chunks, that will be used later to recycle the dedicated memory space to initiate a new sequence of data transfers. The whole transfer is considered has completed when it was transferred all bursts. Currently this IP has a set well-known register map, which includes support for legacy and unroll modes. Legacy mode is version of this register map that has multiplexer register that allows to switch registers between all write and read channels and the unroll modes repeats all write and read channels registers with an offset between them. This register map is called v0. The IP team is creating a new register map more suitable to the latest PCIe features, that very likely will change the map register, which this version will be called v1. As soon as this new version is released by the IP team the support for this version in be included on this driver. According to the logic, patches 1, 2 and 3 should be squashed into 1 unique patch, but for the sake of simplicity of review, it was divided in this 3 patches files. Signed-off-by: Gustavo Pimentel Cc: Vinod Koul Cc: Dan Williams Cc: Andy Shevchenko Cc: Russell King Cc: Joao Pinto Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 + drivers/dma/Makefile | 1 + drivers/dma/dw-edma/Kconfig | 9 + drivers/dma/dw-edma/Makefile | 4 + drivers/dma/dw-edma/dw-edma-core.c | 936 +++++++++++++++++++++++++++++++++++++ drivers/dma/dw-edma/dw-edma-core.h | 165 +++++++ include/linux/dma/edma.h | 47 ++ 7 files changed, 1164 insertions(+) create mode 100644 drivers/dma/dw-edma/Kconfig create mode 100644 drivers/dma/dw-edma/Makefile create mode 100644 drivers/dma/dw-edma/dw-edma-core.c create mode 100644 drivers/dma/dw-edma/dw-edma-core.h create mode 100644 include/linux/dma/edma.h (limited to 'include/linux') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index eaf78f4e07ce..76859aa2688c 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -665,6 +665,8 @@ source "drivers/dma/qcom/Kconfig" source "drivers/dma/dw/Kconfig" +source "drivers/dma/dw-edma/Kconfig" + source "drivers/dma/hsu/Kconfig" source "drivers/dma/sh/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 6126e1c3a875..5bddf6f8790f 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/ obj-$(CONFIG_DW_DMAC_CORE) += dw/ +obj-$(CONFIG_DW_EDMA) += dw-edma/ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o diff --git a/drivers/dma/dw-edma/Kconfig b/drivers/dma/dw-edma/Kconfig new file mode 100644 index 000000000000..3016bed63589 --- /dev/null +++ b/drivers/dma/dw-edma/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +config DW_EDMA + tristate "Synopsys DesignWare eDMA controller driver" + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Synopsys DesignWare eDMA controller, normally + implemented on endpoints SoCs. diff --git a/drivers/dma/dw-edma/Makefile b/drivers/dma/dw-edma/Makefile new file mode 100644 index 000000000000..322401089891 --- /dev/null +++ b/drivers/dma/dw-edma/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DW_EDMA) += dw-edma.o +dw-edma-objs := dw-edma-core.o diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c new file mode 100644 index 000000000000..c9d032f49dc3 --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -0,0 +1,936 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA core driver + * + * Author: Gustavo Pimentel + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dw-edma-core.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +static inline +struct device *dchan2dev(struct dma_chan *dchan) +{ + return &dchan->dev->device; +} + +static inline +struct device *chan2dev(struct dw_edma_chan *chan) +{ + return &chan->vc.chan.dev->device; +} + +static inline +struct dw_edma_desc *vd2dw_edma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct dw_edma_desc, vd); +} + +static struct dw_edma_burst *dw_edma_alloc_burst(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *burst; + + burst = kzalloc(sizeof(*burst), GFP_NOWAIT); + if (unlikely(!burst)) + return NULL; + + INIT_LIST_HEAD(&burst->list); + if (chunk->burst) { + /* Create and add new element into the linked list */ + chunk->bursts_alloc++; + list_add_tail(&burst->list, &chunk->burst->list); + } else { + /* List head */ + chunk->bursts_alloc = 0; + chunk->burst = burst; + } + + return burst; +} + +static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) +{ + struct dw_edma_chan *chan = desc->chan; + struct dw_edma *dw = chan->chip->dw; + struct dw_edma_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT); + if (unlikely(!chunk)) + return NULL; + + INIT_LIST_HEAD(&chunk->list); + chunk->chan = chan; + /* Toggling change bit (CB) in each chunk, this is a mechanism to + * inform the eDMA HW block that this is a new linked list ready + * to be consumed. + * - Odd chunks originate CB equal to 0 + * - Even chunks originate CB equal to 1 + */ + chunk->cb = !(desc->chunks_alloc % 2); + chunk->ll_region.paddr = dw->ll_region.paddr + chan->ll_off; + chunk->ll_region.vaddr = dw->ll_region.vaddr + chan->ll_off; + + if (desc->chunk) { + /* Create and add new element into the linked list */ + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); + if (!dw_edma_alloc_burst(chunk)) { + kfree(chunk); + return NULL; + } + } else { + /* List head */ + chunk->burst = NULL; + desc->chunks_alloc = 0; + desc->chunk = chunk; + } + + return chunk; +} + +static struct dw_edma_desc *dw_edma_alloc_desc(struct dw_edma_chan *chan) +{ + struct dw_edma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (unlikely(!desc)) + return NULL; + + desc->chan = chan; + if (!dw_edma_alloc_chunk(desc)) { + kfree(desc); + return NULL; + } + + return desc; +} + +static void dw_edma_free_burst(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *child, *_next; + + /* Remove all the list elements */ + list_for_each_entry_safe(child, _next, &chunk->burst->list, list) { + list_del(&child->list); + kfree(child); + chunk->bursts_alloc--; + } + + /* Remove the list head */ + kfree(child); + chunk->burst = NULL; +} + +static void dw_edma_free_chunk(struct dw_edma_desc *desc) +{ + struct dw_edma_chunk *child, *_next; + + if (!desc->chunk) + return; + + /* Remove all the list elements */ + list_for_each_entry_safe(child, _next, &desc->chunk->list, list) { + dw_edma_free_burst(child); + list_del(&child->list); + kfree(child); + desc->chunks_alloc--; + } + + /* Remove the list head */ + kfree(child); + desc->chunk = NULL; +} + +static void dw_edma_free_desc(struct dw_edma_desc *desc) +{ + dw_edma_free_chunk(desc); + kfree(desc); +} + +static void vchan_free_desc(struct virt_dma_desc *vdesc) +{ + dw_edma_free_desc(vd2dw_edma_desc(vdesc)); +} + +static void dw_edma_start_transfer(struct dw_edma_chan *chan) +{ + struct dw_edma_chunk *child; + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) + return; + + desc = vd2dw_edma_desc(vd); + if (!desc) + return; + + child = list_first_entry_or_null(&desc->chunk->list, + struct dw_edma_chunk, list); + if (!child) + return; + + dw_edma_v0_core_start(child, !desc->xfer_sz); + desc->xfer_sz += child->ll_region.sz; + dw_edma_free_burst(child); + list_del(&child->list); + kfree(child); + desc->chunks_alloc--; +} + +static int dw_edma_device_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + + memcpy(&chan->config, config, sizeof(*config)); + chan->configured = true; + + return 0; +} + +static int dw_edma_device_pause(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + + if (!chan->configured) + err = -EPERM; + else if (chan->status != EDMA_ST_BUSY) + err = -EPERM; + else if (chan->request != EDMA_REQ_NONE) + err = -EPERM; + else + chan->request = EDMA_REQ_PAUSE; + + return err; +} + +static int dw_edma_device_resume(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + + if (!chan->configured) { + err = -EPERM; + } else if (chan->status != EDMA_ST_PAUSE) { + err = -EPERM; + } else if (chan->request != EDMA_REQ_NONE) { + err = -EPERM; + } else { + chan->status = EDMA_ST_BUSY; + dw_edma_start_transfer(chan); + } + + return err; +} + +static int dw_edma_device_terminate_all(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + LIST_HEAD(head); + + if (!chan->configured) { + /* Do nothing */ + } else if (chan->status == EDMA_ST_PAUSE) { + chan->status = EDMA_ST_IDLE; + chan->configured = false; + } else if (chan->status == EDMA_ST_IDLE) { + chan->configured = false; + } else if (dw_edma_v0_core_ch_status(chan) == DMA_COMPLETE) { + /* + * The channel is in a false BUSY state, probably didn't + * receive or lost an interrupt + */ + chan->status = EDMA_ST_IDLE; + chan->configured = false; + } else if (chan->request > EDMA_REQ_PAUSE) { + err = -EPERM; + } else { + chan->request = EDMA_REQ_STOP; + } + + return err; +} + +static void dw_edma_device_issue_pending(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (chan->configured && chan->request == EDMA_REQ_NONE && + chan->status == EDMA_ST_IDLE && vchan_issue_pending(&chan->vc)) { + chan->status = EDMA_ST_BUSY; + dw_edma_start_transfer(chan); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static enum dma_status +dw_edma_device_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + u32 residue = 0; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + if (ret == DMA_IN_PROGRESS && chan->status == EDMA_ST_PAUSE) + ret = DMA_PAUSED; + + if (!txstate) + goto ret_residue; + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_find_desc(&chan->vc, cookie); + if (vd) { + desc = vd2dw_edma_desc(vd); + if (desc) + residue = desc->alloc_sz - desc->xfer_sz; + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + +ret_residue: + dma_set_residue(txstate, residue); + + return ret; +} + +static struct dma_async_tx_descriptor * +dw_edma_device_transfer(struct dw_edma_transfer *xfer) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(xfer->dchan); + enum dma_transfer_direction direction = xfer->direction; + phys_addr_t src_addr, dst_addr; + struct scatterlist *sg = NULL; + struct dw_edma_chunk *chunk; + struct dw_edma_burst *burst; + struct dw_edma_desc *desc; + u32 cnt; + int i; + + if ((direction == DMA_MEM_TO_DEV && chan->dir == EDMA_DIR_WRITE) || + (direction == DMA_DEV_TO_MEM && chan->dir == EDMA_DIR_READ)) + return NULL; + + if (xfer->cyclic) { + if (!xfer->xfer.cyclic.len || !xfer->xfer.cyclic.cnt) + return NULL; + } else { + if (xfer->xfer.sg.len < 1) + return NULL; + } + + if (!chan->configured) + return NULL; + + desc = dw_edma_alloc_desc(chan); + if (unlikely(!desc)) + goto err_alloc; + + chunk = dw_edma_alloc_chunk(desc); + if (unlikely(!chunk)) + goto err_alloc; + + src_addr = chan->config.src_addr; + dst_addr = chan->config.dst_addr; + + if (xfer->cyclic) { + cnt = xfer->xfer.cyclic.cnt; + } else { + cnt = xfer->xfer.sg.len; + sg = xfer->xfer.sg.sgl; + } + + for (i = 0; i < cnt; i++) { + if (!xfer->cyclic && !sg) + break; + + if (chunk->bursts_alloc == chan->ll_max) { + chunk = dw_edma_alloc_chunk(desc); + if (unlikely(!chunk)) + goto err_alloc; + } + + burst = dw_edma_alloc_burst(chunk); + if (unlikely(!burst)) + goto err_alloc; + + if (xfer->cyclic) + burst->sz = xfer->xfer.cyclic.len; + else + burst->sz = sg_dma_len(sg); + + chunk->ll_region.sz += burst->sz; + desc->alloc_sz += burst->sz; + + if (direction == DMA_DEV_TO_MEM) { + burst->sar = src_addr; + if (xfer->cyclic) { + burst->dar = xfer->xfer.cyclic.paddr; + } else { + burst->dar = sg_dma_address(sg); + /* Unlike the typical assumption by other + * drivers/IPs the peripheral memory isn't + * a FIFO memory, in this case, it's a + * linear memory and that why the source + * and destination addresses are increased + * by the same portion (data length) + */ + src_addr += sg_dma_len(sg); + } + } else { + burst->dar = dst_addr; + if (xfer->cyclic) { + burst->sar = xfer->xfer.cyclic.paddr; + } else { + burst->sar = sg_dma_address(sg); + /* Unlike the typical assumption by other + * drivers/IPs the peripheral memory isn't + * a FIFO memory, in this case, it's a + * linear memory and that why the source + * and destination addresses are increased + * by the same portion (data length) + */ + dst_addr += sg_dma_len(sg); + } + } + + if (!xfer->cyclic) + sg = sg_next(sg); + } + + return vchan_tx_prep(&chan->vc, &desc->vd, xfer->flags); + +err_alloc: + if (desc) + dw_edma_free_desc(desc); + + return NULL; +} + +static struct dma_async_tx_descriptor * +dw_edma_device_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct dw_edma_transfer xfer; + + xfer.dchan = dchan; + xfer.direction = direction; + xfer.xfer.sg.sgl = sgl; + xfer.xfer.sg.len = len; + xfer.flags = flags; + xfer.cyclic = false; + + return dw_edma_device_transfer(&xfer); +} + +static struct dma_async_tx_descriptor * +dw_edma_device_prep_dma_cyclic(struct dma_chan *dchan, dma_addr_t paddr, + size_t len, size_t count, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct dw_edma_transfer xfer; + + xfer.dchan = dchan; + xfer.direction = direction; + xfer.xfer.cyclic.paddr = paddr; + xfer.xfer.cyclic.len = len; + xfer.xfer.cyclic.cnt = count; + xfer.flags = flags; + xfer.cyclic = true; + + return dw_edma_device_transfer(&xfer); +} + +static void dw_edma_done_interrupt(struct dw_edma_chan *chan) +{ + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + unsigned long flags; + + dw_edma_v0_core_clear_done_int(chan); + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_next_desc(&chan->vc); + if (vd) { + switch (chan->request) { + case EDMA_REQ_NONE: + desc = vd2dw_edma_desc(vd); + if (desc->chunks_alloc) { + chan->status = EDMA_ST_BUSY; + dw_edma_start_transfer(chan); + } else { + list_del(&vd->node); + vchan_cookie_complete(vd); + chan->status = EDMA_ST_IDLE; + } + break; + + case EDMA_REQ_STOP: + list_del(&vd->node); + vchan_cookie_complete(vd); + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; + break; + + case EDMA_REQ_PAUSE: + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_PAUSE; + break; + + default: + break; + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static void dw_edma_abort_interrupt(struct dw_edma_chan *chan) +{ + struct virt_dma_desc *vd; + unsigned long flags; + + dw_edma_v0_core_clear_abort_int(chan); + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_next_desc(&chan->vc); + if (vd) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; +} + +static irqreturn_t dw_edma_interrupt(int irq, void *data, bool write) +{ + struct dw_edma_irq *dw_irq = data; + struct dw_edma *dw = dw_irq->dw; + unsigned long total, pos, val; + unsigned long off; + u32 mask; + + if (write) { + total = dw->wr_ch_cnt; + off = 0; + mask = dw_irq->wr_mask; + } else { + total = dw->rd_ch_cnt; + off = dw->wr_ch_cnt; + mask = dw_irq->rd_mask; + } + + val = dw_edma_v0_core_status_done_int(dw, write ? + EDMA_DIR_WRITE : + EDMA_DIR_READ); + val &= mask; + for_each_set_bit(pos, &val, total) { + struct dw_edma_chan *chan = &dw->chan[pos + off]; + + dw_edma_done_interrupt(chan); + } + + val = dw_edma_v0_core_status_abort_int(dw, write ? + EDMA_DIR_WRITE : + EDMA_DIR_READ); + val &= mask; + for_each_set_bit(pos, &val, total) { + struct dw_edma_chan *chan = &dw->chan[pos + off]; + + dw_edma_abort_interrupt(chan); + } + + return IRQ_HANDLED; +} + +static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data) +{ + return dw_edma_interrupt(irq, data, true); +} + +static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data) +{ + return dw_edma_interrupt(irq, data, false); +} + +static irqreturn_t dw_edma_interrupt_common(int irq, void *data) +{ + dw_edma_interrupt(irq, data, true); + dw_edma_interrupt(irq, data, false); + + return IRQ_HANDLED; +} + +static int dw_edma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + + if (chan->status != EDMA_ST_IDLE) + return -EBUSY; + + pm_runtime_get(chan->chip->dev); + + return 0; +} + +static void dw_edma_free_chan_resources(struct dma_chan *dchan) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(5000); + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int ret; + + while (time_before(jiffies, timeout)) { + ret = dw_edma_device_terminate_all(dchan); + if (!ret) + break; + + if (time_after_eq(jiffies, timeout)) + return; + + cpu_relax(); + }; + + pm_runtime_put(chan->chip->dev); +} + +static int dw_edma_channel_setup(struct dw_edma_chip *chip, bool write, + u32 wr_alloc, u32 rd_alloc) +{ + struct dw_edma_region *dt_region; + struct device *dev = chip->dev; + struct dw_edma *dw = chip->dw; + struct dw_edma_chan *chan; + size_t ll_chunk, dt_chunk; + struct dw_edma_irq *irq; + struct dma_device *dma; + u32 i, j, cnt, ch_cnt; + u32 alloc, off_alloc; + int err = 0; + u32 pos; + + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; + ll_chunk = dw->ll_region.sz; + dt_chunk = dw->dt_region.sz; + + /* Calculate linked list chunk for each channel */ + ll_chunk /= roundup_pow_of_two(ch_cnt); + + /* Calculate linked list chunk for each channel */ + dt_chunk /= roundup_pow_of_two(ch_cnt); + + if (write) { + i = 0; + cnt = dw->wr_ch_cnt; + dma = &dw->wr_edma; + alloc = wr_alloc; + off_alloc = 0; + } else { + i = dw->wr_ch_cnt; + cnt = dw->rd_ch_cnt; + dma = &dw->rd_edma; + alloc = rd_alloc; + off_alloc = wr_alloc; + } + + INIT_LIST_HEAD(&dma->channels); + for (j = 0; (alloc || dw->nr_irqs == 1) && j < cnt; j++, i++) { + chan = &dw->chan[i]; + + dt_region = devm_kzalloc(dev, sizeof(*dt_region), GFP_KERNEL); + if (!dt_region) + return -ENOMEM; + + chan->vc.chan.private = dt_region; + + chan->chip = chip; + chan->id = j; + chan->dir = write ? EDMA_DIR_WRITE : EDMA_DIR_READ; + chan->configured = false; + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; + + chan->ll_off = (ll_chunk * i); + chan->ll_max = (ll_chunk / EDMA_LL_SZ) - 1; + + chan->dt_off = (dt_chunk * i); + + dev_vdbg(dev, "L. List:\tChannel %s[%u] off=0x%.8lx, max_cnt=%u\n", + write ? "write" : "read", j, + chan->ll_off, chan->ll_max); + + if (dw->nr_irqs == 1) + pos = 0; + else + pos = off_alloc + (j % alloc); + + irq = &dw->irq[pos]; + + if (write) + irq->wr_mask |= BIT(j); + else + irq->rd_mask |= BIT(j); + + irq->dw = dw; + memcpy(&chan->msi, &irq->msi, sizeof(chan->msi)); + + dev_vdbg(dev, "MSI:\t\tChannel %s[%u] addr=0x%.8x%.8x, data=0x%.8x\n", + write ? "write" : "read", j, + chan->msi.address_hi, chan->msi.address_lo, + chan->msi.data); + + chan->vc.desc_free = vchan_free_desc; + vchan_init(&chan->vc, dma); + + dt_region->paddr = dw->dt_region.paddr + chan->dt_off; + dt_region->vaddr = dw->dt_region.vaddr + chan->dt_off; + dt_region->sz = dt_chunk; + + dev_vdbg(dev, "Data:\tChannel %s[%u] off=0x%.8lx\n", + write ? "write" : "read", j, chan->dt_off); + + dw_edma_v0_core_device_config(chan); + } + + /* Set DMA channel capabilities */ + dma_cap_zero(dma->cap_mask); + dma_cap_set(DMA_SLAVE, dma->cap_mask); + dma_cap_set(DMA_CYCLIC, dma->cap_mask); + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + dma->directions = BIT(write ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV); + dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma->chancnt = cnt; + + /* Set DMA channel callbacks */ + dma->dev = chip->dev; + dma->device_alloc_chan_resources = dw_edma_alloc_chan_resources; + dma->device_free_chan_resources = dw_edma_free_chan_resources; + dma->device_config = dw_edma_device_config; + dma->device_pause = dw_edma_device_pause; + dma->device_resume = dw_edma_device_resume; + dma->device_terminate_all = dw_edma_device_terminate_all; + dma->device_issue_pending = dw_edma_device_issue_pending; + dma->device_tx_status = dw_edma_device_tx_status; + dma->device_prep_slave_sg = dw_edma_device_prep_slave_sg; + dma->device_prep_dma_cyclic = dw_edma_device_prep_dma_cyclic; + + dma_set_max_seg_size(dma->dev, U32_MAX); + + /* Register DMA device */ + err = dma_async_device_register(dma); + + return err; +} + +static inline void dw_edma_dec_irq_alloc(int *nr_irqs, u32 *alloc, u16 cnt) +{ + if (*nr_irqs && *alloc < cnt) { + (*alloc)++; + (*nr_irqs)--; + } +} + +static inline void dw_edma_add_irq_mask(u32 *mask, u32 alloc, u16 cnt) +{ + while (*mask * alloc < cnt) + (*mask)++; +} + +static int dw_edma_irq_request(struct dw_edma_chip *chip, + u32 *wr_alloc, u32 *rd_alloc) +{ + struct device *dev = chip->dev; + struct dw_edma *dw = chip->dw; + u32 wr_mask = 1; + u32 rd_mask = 1; + int i, err = 0; + u32 ch_cnt; + + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; + + if (dw->nr_irqs < 1) + return -EINVAL; + + if (dw->nr_irqs == 1) { + /* Common IRQ shared among all channels */ + err = request_irq(pci_irq_vector(to_pci_dev(dev), 0), + dw_edma_interrupt_common, + IRQF_SHARED, dw->name, &dw->irq[0]); + if (err) { + dw->nr_irqs = 0; + return err; + } + + get_cached_msi_msg(pci_irq_vector(to_pci_dev(dev), 0), + &dw->irq[0].msi); + } else { + /* Distribute IRQs equally among all channels */ + int tmp = dw->nr_irqs; + + while (tmp && (*wr_alloc + *rd_alloc) < ch_cnt) { + dw_edma_dec_irq_alloc(&tmp, wr_alloc, dw->wr_ch_cnt); + dw_edma_dec_irq_alloc(&tmp, rd_alloc, dw->rd_ch_cnt); + } + + dw_edma_add_irq_mask(&wr_mask, *wr_alloc, dw->wr_ch_cnt); + dw_edma_add_irq_mask(&rd_mask, *rd_alloc, dw->rd_ch_cnt); + + for (i = 0; i < (*wr_alloc + *rd_alloc); i++) { + err = request_irq(pci_irq_vector(to_pci_dev(dev), i), + i < *wr_alloc ? + dw_edma_interrupt_write : + dw_edma_interrupt_read, + IRQF_SHARED, dw->name, + &dw->irq[i]); + if (err) { + dw->nr_irqs = i; + return err; + } + + get_cached_msi_msg(pci_irq_vector(to_pci_dev(dev), i), + &dw->irq[i].msi); + } + + dw->nr_irqs = i; + } + + return err; +} + +int dw_edma_probe(struct dw_edma_chip *chip) +{ + struct device *dev = chip->dev; + struct dw_edma *dw = chip->dw; + u32 wr_alloc = 0; + u32 rd_alloc = 0; + int i, err; + + raw_spin_lock_init(&dw->lock); + + /* Find out how many write channels are supported by hardware */ + dw->wr_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_WRITE); + if (!dw->wr_ch_cnt) + return -EINVAL; + + /* Find out how many read channels are supported by hardware */ + dw->rd_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_READ); + if (!dw->rd_ch_cnt) + return -EINVAL; + + dev_vdbg(dev, "Channels:\twrite=%d, read=%d\n", + dw->wr_ch_cnt, dw->rd_ch_cnt); + + /* Allocate channels */ + dw->chan = devm_kcalloc(dev, dw->wr_ch_cnt + dw->rd_ch_cnt, + sizeof(*dw->chan), GFP_KERNEL); + if (!dw->chan) + return -ENOMEM; + + snprintf(dw->name, sizeof(dw->name), "dw-edma-core:%d", chip->id); + + /* Disable eDMA, only to establish the ideal initial conditions */ + dw_edma_v0_core_off(dw); + + /* Request IRQs */ + err = dw_edma_irq_request(chip, &wr_alloc, &rd_alloc); + if (err) + return err; + + /* Setup write channels */ + err = dw_edma_channel_setup(chip, true, wr_alloc, rd_alloc); + if (err) + goto err_irq_free; + + /* Setup read channels */ + err = dw_edma_channel_setup(chip, false, wr_alloc, rd_alloc); + if (err) + goto err_irq_free; + + /* Power management */ + pm_runtime_enable(dev); + + /* Turn debugfs on */ + dw_edma_v0_core_debugfs_on(chip); + + return 0; + +err_irq_free: + for (i = (dw->nr_irqs - 1); i >= 0; i--) + free_irq(pci_irq_vector(to_pci_dev(dev), i), &dw->irq[i]); + + dw->nr_irqs = 0; + + return err; +} +EXPORT_SYMBOL_GPL(dw_edma_probe); + +int dw_edma_remove(struct dw_edma_chip *chip) +{ + struct dw_edma_chan *chan, *_chan; + struct device *dev = chip->dev; + struct dw_edma *dw = chip->dw; + int i; + + /* Disable eDMA */ + dw_edma_v0_core_off(dw); + + /* Free irqs */ + for (i = (dw->nr_irqs - 1); i >= 0; i--) + free_irq(pci_irq_vector(to_pci_dev(dev), i), &dw->irq[i]); + + /* Power management */ + pm_runtime_disable(dev); + + list_for_each_entry_safe(chan, _chan, &dw->wr_edma.channels, + vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } + + list_for_each_entry_safe(chan, _chan, &dw->rd_edma.channels, + vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } + + /* Deregister eDMA device */ + dma_async_device_unregister(&dw->wr_edma); + dma_async_device_unregister(&dw->rd_edma); + + /* Turn debugfs off */ + dw_edma_v0_core_debugfs_off(); + + return 0; +} +EXPORT_SYMBOL_GPL(dw_edma_remove); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare eDMA controller core driver"); +MODULE_AUTHOR("Gustavo Pimentel "); diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h new file mode 100644 index 000000000000..b6cc90cbc9dc --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-core.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA core driver + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_CORE_H +#define _DW_EDMA_CORE_H + +#include +#include + +#include "../virt-dma.h" + +#define EDMA_LL_SZ 24 + +enum dw_edma_dir { + EDMA_DIR_WRITE = 0, + EDMA_DIR_READ +}; + +enum dw_edma_mode { + EDMA_MODE_LEGACY = 0, + EDMA_MODE_UNROLL +}; + +enum dw_edma_request { + EDMA_REQ_NONE = 0, + EDMA_REQ_STOP, + EDMA_REQ_PAUSE +}; + +enum dw_edma_status { + EDMA_ST_IDLE = 0, + EDMA_ST_PAUSE, + EDMA_ST_BUSY +}; + +struct dw_edma_chan; +struct dw_edma_chunk; + +struct dw_edma_burst { + struct list_head list; + u64 sar; + u64 dar; + u32 sz; +}; + +struct dw_edma_region { + phys_addr_t paddr; + dma_addr_t vaddr; + size_t sz; +}; + +struct dw_edma_chunk { + struct list_head list; + struct dw_edma_chan *chan; + struct dw_edma_burst *burst; + + u32 bursts_alloc; + + u8 cb; + struct dw_edma_region ll_region; /* Linked list */ +}; + +struct dw_edma_desc { + struct virt_dma_desc vd; + struct dw_edma_chan *chan; + struct dw_edma_chunk *chunk; + + u32 chunks_alloc; + + u32 alloc_sz; + u32 xfer_sz; +}; + +struct dw_edma_chan { + struct virt_dma_chan vc; + struct dw_edma_chip *chip; + int id; + enum dw_edma_dir dir; + + off_t ll_off; + u32 ll_max; + + off_t dt_off; + + struct msi_msg msi; + + enum dw_edma_request request; + enum dw_edma_status status; + u8 configured; + + struct dma_slave_config config; +}; + +struct dw_edma_irq { + struct msi_msg msi; + u32 wr_mask; + u32 rd_mask; + struct dw_edma *dw; +}; + +struct dw_edma { + char name[20]; + + struct dma_device wr_edma; + u16 wr_ch_cnt; + + struct dma_device rd_edma; + u16 rd_ch_cnt; + + struct dw_edma_region rg_region; /* Registers */ + struct dw_edma_region ll_region; /* Linked list */ + struct dw_edma_region dt_region; /* Data */ + + struct dw_edma_irq *irq; + int nr_irqs; + + u32 version; + enum dw_edma_mode mode; + + struct dw_edma_chan *chan; + const struct dw_edma_core_ops *ops; + + raw_spinlock_t lock; /* Only for legacy */ +}; + +struct dw_edma_sg { + struct scatterlist *sgl; + unsigned int len; +}; + +struct dw_edma_cyclic { + dma_addr_t paddr; + size_t len; + size_t cnt; +}; + +struct dw_edma_transfer { + struct dma_chan *dchan; + union dw_edma_xfer { + struct dw_edma_sg sg; + struct dw_edma_cyclic cyclic; + } xfer; + enum dma_transfer_direction direction; + unsigned long flags; + bool cyclic; +}; + +static inline +struct dw_edma_chan *vc2dw_edma_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct dw_edma_chan, vc); +} + +static inline +struct dw_edma_chan *dchan2dw_edma_chan(struct dma_chan *dchan) +{ + return vc2dw_edma_chan(to_virt_chan(dchan)); +} + +#endif /* _DW_EDMA_CORE_H */ diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h new file mode 100644 index 000000000000..cab6e18773da --- /dev/null +++ b/include/linux/dma/edma.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA core driver + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_H +#define _DW_EDMA_H + +#include +#include + +struct dw_edma; + +/** + * struct dw_edma_chip - representation of DesignWare eDMA controller hardware + * @dev: struct device of the eDMA controller + * @id: instance ID + * @irq: irq line + * @dw: struct dw_edma that is filed by dw_edma_probe() + */ +struct dw_edma_chip { + struct device *dev; + int id; + int irq; + struct dw_edma *dw; +}; + +/* Export to the platform drivers */ +#if IS_ENABLED(CONFIG_DW_EDMA) +int dw_edma_probe(struct dw_edma_chip *chip); +int dw_edma_remove(struct dw_edma_chip *chip); +#else +static inline int dw_edma_probe(struct dw_edma_chip *chip) +{ + return -ENODEV; +} + +static inline int dw_edma_remove(struct dw_edma_chip *chip) +{ + return 0; +} +#endif /* CONFIG_DW_EDMA */ + +#endif /* _DW_EDMA_H */ -- cgit v1.2.3 From 1f418f46503d72594bbe6407d97fd2ae1ce15ee6 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Tue, 4 Jun 2019 15:29:25 +0200 Subject: PCI: Add Synopsys endpoint EDDA Device ID Create and add Synopsys Endpoint EDDA Device ID to PCI ID list, since this ID is now being use on two different drivers (pci_endpoint_test.ko and dw-edma-pcie.ko). Signed-off-by: Gustavo Pimentel Acked-by: Bjorn Helgaas Cc: Kishon Vijay Abraham I Cc: Bjorn Helgaas Cc: Lorenzo Pieralisi Cc: Joao Pinto Signed-off-by: Vinod Koul --- drivers/misc/pci_endpoint_test.c | 2 +- include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 7b015f2a1c6f..1f531c1b4f74 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -804,7 +804,7 @@ static const struct pci_device_id pci_endpoint_test_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) }, { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0) }, - { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, 0xedda) }, + { PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654), .driver_data = (kernel_ulong_t)&am654_data }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 70e86148cb1e..4aad69fc4d6b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2366,6 +2366,7 @@ #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31 0xabcf +#define PCI_DEVICE_ID_SYNOPSYS_EDDA 0xedda #define PCI_VENDOR_ID_USR 0x16ec -- cgit v1.2.3 From 2769bd79a9154b933cc774ee773dd78b04d2be60 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:32 -0700 Subject: mfd: cros_ec: Update license term Update to SPDX-License-Identifier, GPL-2.0 Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index dcec96f01879..48292d449921 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1,25 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Host communication command constants for ChromeOS EC * * Copyright (C) 2012 Google, Inc * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * The ChromeOS EC multi function device is used to mux all the requests - * to the EC device for its multiple features: keyboard controller, - * battery charging and regulator control, firmware update. - * - * NOTE: This file is copied verbatim from the ChromeOS EC Open Source - * project in an attempt to make future updates easy to make. + * NOTE: This file is auto-generated from ChromeOS EC Open Source code from + * https://chromium.googlesource.com/chromiumos/platform/ec/+/master/include/ec_commands.h */ +/* Host communication command constants for Chrome EC */ + #ifndef __CROS_EC_COMMANDS_H #define __CROS_EC_COMMANDS_H -- cgit v1.2.3 From c9f69d8b170c2044013bf0248300d25a068f3fb5 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:33 -0700 Subject: mfd: cros_ec: Zero BUILD_ macro Defined out build macro used when compiling embedded controller firmware. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 48292d449921..7b8fac4d0c89 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -13,6 +13,11 @@ #ifndef __CROS_EC_COMMANDS_H #define __CROS_EC_COMMANDS_H + + + +#define BUILD_ASSERT(_cond) + /* * Current version of this protocol * -- cgit v1.2.3 From df95a3bdf8f1fe110a50873f8f24eb2675b73d35 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:34 -0700 Subject: mfd: cros_ec: set comments properly Fix comments syntax and spelling errors. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 65 ++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 7b8fac4d0c89..a7a7060f44f7 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -42,13 +42,16 @@ /* Protocol version 2 */ #define EC_LPC_ADDR_HOST_ARGS 0x800 /* And 0x801, 0x802, 0x803 */ #define EC_LPC_ADDR_HOST_PARAM 0x804 /* For version 2 params; size is - * EC_PROTO2_MAX_PARAM_SIZE */ + * EC_PROTO2_MAX_PARAM_SIZE + */ /* Protocol version 3 */ #define EC_LPC_ADDR_HOST_PACKET 0x800 /* Offset of version 3 packet */ #define EC_LPC_HOST_PACKET_SIZE 0x100 /* Max size of version 3 packet */ -/* The actual block is 0x800-0x8ff, but some BIOSes think it's 0x880-0x8ff - * and they tell the kernel that so we have to think of it as two parts. */ +/* + * The actual block is 0x800-0x8ff, but some BIOSes think it's 0x880-0x8ff + * and they tell the kernel that so we have to think of it as two parts. + */ #define EC_HOST_CMD_REGION0 0x800 #define EC_HOST_CMD_REGION1 0x880 #define EC_HOST_CMD_REGION_SIZE 0x80 @@ -324,7 +327,7 @@ struct ec_lpc_host_args { * If EC gets a command and this flag is not set, this is an old-style command. * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with * unknown length. EC must respond with an old-style response (that is, - * withouth setting EC_HOST_ARGS_FLAG_TO_HOST). + * without setting EC_HOST_ARGS_FLAG_TO_HOST). */ #define EC_HOST_ARGS_FLAG_FROM_HOST 0x01 /* @@ -511,7 +514,7 @@ struct ec_host_response { * Notes on commands: * * Each command is an 16-bit command value. Commands which take params or - * return response data specify structs for that data. If no struct is + * return response data specify structures for that data. If no structure is * specified, the command does not input or output data, respectively. * Parameter/response length is implicit in the structs. Some underlying * communication protocols (I2C, SPI) may add length or checksum headers, but @@ -684,7 +687,7 @@ struct ec_response_get_cmd_versions { } __packed; /* - * Check EC communcations status (busy). This is needed on i2c/spi but not + * Check EC communications status (busy). This is needed on i2c/spi but not * on lpc since it has its own out-of-band busy indicator. * * lpc must read the status from the command register. Attempting this on @@ -721,7 +724,7 @@ struct ec_response_test_protocol { uint8_t buf[32]; } __packed; -/* Get prococol information */ +/* Get protocol information */ #define EC_CMD_GET_PROTOCOL_INFO 0x0b /* Flags for ec_response_get_protocol_info.flags */ @@ -767,7 +770,7 @@ struct ec_response_get_set_value { uint32_t value; } __packed; -/* More than one command can use these structs to get/set paramters. */ +/* More than one command can use these structs to get/set parameters. */ #define EC_CMD_GSV_PAUSE_IN_S5 0x0c /*****************************************************************************/ @@ -917,8 +920,10 @@ struct ec_response_flash_info { uint32_t protect_block_size; } __packed; -/* Flags for version 1+ flash info command */ -/* EC flash erases bits to 0 instead of 1 */ +/* + * Flags for version 1+ flash info command + * EC flash erases bits to 0 instead of 1. + */ #define EC_FLASH_INFO_ERASE_TO_0 (1 << 0) /** @@ -941,7 +946,8 @@ struct ec_response_flash_info { * fields following. * * gcc anonymous structs don't seem to get along with the __packed directive; - * if they did we'd define the version 0 struct as a sub-struct of this one. + * if they did we'd define the version 0 structure as a sub-structure of this + * one. */ struct ec_response_flash_info_1 { /* Version 0 fields; see above for description */ @@ -1036,7 +1042,7 @@ struct ec_params_flash_erase { * re-requesting the desired flags, or by a hard reset if that fails. */ #define EC_FLASH_PROTECT_ERROR_INCONSISTENT (1 << 5) -/* Entile flash code protected when the EC boots */ +/* Entire flash code protected when the EC boots */ #define EC_FLASH_PROTECT_ALL_AT_BOOT (1 << 6) /** @@ -1629,7 +1635,7 @@ struct ec_response_motion_sensor_data { union { int16_t data[3]; struct { - uint16_t rsvd; + uint16_t reserved; uint32_t timestamp; } __packed; struct { @@ -1828,7 +1834,7 @@ struct ec_response_rtc { #define EC_CMD_RTC_SET_VALUE 0x46 #define EC_CMD_RTC_SET_ALARM 0x47 -/* Pass as param to SET_ALARM to clear the current alarm */ +/* Pass as time param to SET_ALARM to clear the current alarm */ #define EC_RTC_ALARM_CLEAR 0 /*****************************************************************************/ @@ -1914,7 +1920,8 @@ enum ec_temp_thresholds { EC_TEMP_THRESH_COUNT }; -/* Thermal configuration for one temperature sensor. Temps are in degrees K. +/* + * Thermal configuration for one temperature sensor. Temps are in degrees K. * Zero values will be silently ignored by the thermal task. */ struct ec_thermal_config { @@ -1929,8 +1936,10 @@ struct ec_params_thermal_get_threshold_v1 { } __packed; /* This returns a struct ec_thermal_config */ -/* Version 1 - set config for one sensor. - * Use read-modify-write for best results! */ +/* + * Version 1 - set config for one sensor. + * Use read-modify-write for best results! + */ struct ec_params_thermal_set_threshold_v1 { uint32_t sensor_num; struct ec_thermal_config cfg; @@ -2079,7 +2088,12 @@ enum mkbp_config_valid { EC_MKBP_VALID_FIFO_MAX_DEPTH = 1 << 7, }; -/* Configuration for our key scanning algorithm */ +/* + * Configuration for our key scanning algorithm. + * + * Note that this is used as a sub-structure of + * ec_{params/response}_mkbp_get_config. + */ struct ec_mkbp_config { uint32_t valid_mask; /* valid fields */ uint8_t flags; /* some flags (enum mkbp_config_flags) */ @@ -2362,6 +2376,7 @@ struct ec_params_gpio_set { struct ec_params_gpio_get { char name[32]; } __packed; + struct ec_response_gpio_get { uint8_t val; } __packed; @@ -2402,8 +2417,10 @@ enum gpio_get_subcmd { /* I2C commands. Only available when flash write protect is unlocked. */ /* - * TODO(crosbug.com/p/23570): These commands are deprecated, and will be - * removed soon. Use EC_CMD_I2C_XFER instead. + * CAUTION: These commands are deprecated, and are not supported anymore in EC + * builds >= 8398.0.0 (see crosbug.com/p/23570). + * + * Use EC_CMD_I2C_PASSTHRU instead. */ /* Read I2C bus */ @@ -2415,6 +2432,7 @@ struct ec_params_i2c_read { uint8_t port; uint8_t offset; } __packed; + struct ec_response_i2c_read { uint16_t data; } __packed; @@ -2450,7 +2468,6 @@ struct ec_params_charge_control { } __packed; /*****************************************************************************/ -/* Console commands. Only available when flash write protect is unlocked. */ /* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */ #define EC_CMD_CONSOLE_SNAPSHOT 0x97 @@ -2904,9 +2921,7 @@ enum ec_i2s_config { }; struct ec_param_codec_i2s { - /* - * enum ec_codec_i2s_subcmd - */ + /* enum ec_codec_i2s_subcmd */ uint8_t cmd; union { /* @@ -2981,7 +2996,7 @@ struct ec_response_codec_gain { enum ec_reboot_cmd { EC_REBOOT_CANCEL = 0, /* Cancel a pending reboot */ EC_REBOOT_JUMP_RO = 1, /* Jump to RO without rebooting */ - EC_REBOOT_JUMP_RW = 2, /* Jump to RW without rebooting */ + EC_REBOOT_JUMP_RW = 2, /* Jump to active RW without rebooting */ /* (command 3 was jump to RW-B) */ EC_REBOOT_COLD = 4, /* Cold-reboot */ EC_REBOOT_DISABLE_JUMP = 5, /* Disable jump until next reboot */ -- cgit v1.2.3 From 6f72c3f9bbdf08fde3b328c8bafbe8d667590b4e Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:35 -0700 Subject: mfd: cros_ec: add ec_align macros To reduce code and improve performance of the embedded controller firmware, pragma __aligned(2) or __aligned(4) are used when alignment to 16 or 32 bit boundary is expected. Define all ec_align to packed when compiling kernel. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 442 +++++++++++++++++++---------------- 1 file changed, 238 insertions(+), 204 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index a7a7060f44f7..c12ae9742e20 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -188,12 +188,46 @@ #ifndef __ACPI__ /* - * Define __packed if someone hasn't beat us to it. Linux kernel style - * checking prefers __packed over __attribute__((packed)). + * Attributes for EC request and response packets. Just defining __packed + * results in inefficient assembly code on ARM, if the structure is actually + * 32-bit aligned, as it should be for all buffers. + * + * Be very careful when adding these to existing structures. They will round + * up the structure size to the specified boundary. + * + * Also be very careful to make that if a structure is included in some other + * parent structure that the alignment will still be true given the packing of + * the parent structure. This is particularly important if the sub-structure + * will be passed as a pointer to another function, since that function will + * not know about the misaligment caused by the parent structure's packing. + * + * Also be very careful using __packed - particularly when nesting non-packed + * structures inside packed ones. In fact, DO NOT use __packed directly; + * always use one of these attributes. + * + * Once everything is annotated properly, the following search strings should + * not return ANY matches in this file other than right here: + * + * "__packed" - generates inefficient code; all sub-structs must also be packed + * + * "struct [^_]" - all structs should be annotated, except for structs that are + * members of other structs/unions (and their original declarations should be + * annotated). */ -#ifndef __packed -#define __packed __attribute__((packed)) -#endif + +/* + * Packed structures make no assumption about alignment, so they do inefficient + * byte-wise reads. + */ +#define __ec_align1 __packed +#define __ec_align2 __packed +#define __ec_align4 __packed +#define __ec_align_size1 __packed +#define __ec_align_offset1 __packed +#define __ec_align_offset2 __packed +#define __ec_todo_packed __packed +#define __ec_todo_unpacked + /* LPC command status byte masks */ /* EC has written a byte in the data register and host hasn't read it yet */ @@ -317,7 +351,7 @@ struct ec_lpc_host_args { uint8_t command_version; uint8_t data_size; uint8_t checksum; -} __packed; +} __ec_align4; /* Flags for ec_lpc_host_args.flags */ /* @@ -488,7 +522,7 @@ struct ec_host_request { uint8_t command_version; uint8_t reserved; uint16_t data_len; -} __packed; +} __ec_align4; #define EC_HOST_RESPONSE_VERSION 3 @@ -507,7 +541,7 @@ struct ec_host_response { uint16_t result; uint16_t data_len; uint16_t reserved; -} __packed; +} __ec_align4; /*****************************************************************************/ /* @@ -536,7 +570,7 @@ struct ec_host_response { */ struct ec_response_proto_version { uint32_t version; -} __packed; +} __ec_align4; /* * Hello. This is a simple command to test the EC is responsive to @@ -550,7 +584,7 @@ struct ec_response_proto_version { */ struct ec_params_hello { uint32_t in_data; -} __packed; +} __ec_align4; /** * struct ec_response_hello - Response to the hello command. @@ -558,7 +592,7 @@ struct ec_params_hello { */ struct ec_response_hello { uint32_t out_data; -} __packed; +} __ec_align4; /* Get version number */ #define EC_CMD_GET_VERSION 0x02 @@ -581,7 +615,7 @@ struct ec_response_get_version { char version_string_rw[32]; char reserved[32]; uint32_t current_image; -} __packed; +} __ec_align4; /* Read test */ #define EC_CMD_READ_TEST 0x03 @@ -594,7 +628,7 @@ struct ec_response_get_version { struct ec_params_read_test { uint32_t offset; uint32_t size; -} __packed; +} __ec_align4; /** * struct ec_response_read_test - Response to the read test command. @@ -602,7 +636,7 @@ struct ec_params_read_test { */ struct ec_response_read_test { uint32_t data[32]; -} __packed; +} __ec_align4; /* * Get build information @@ -624,7 +658,7 @@ struct ec_response_get_chip_info { char vendor[32]; char name[32]; char revision[32]; -} __packed; +} __ec_align4; /* Get board HW version */ #define EC_CMD_GET_BOARD_VERSION 0x06 @@ -635,7 +669,7 @@ struct ec_response_get_chip_info { */ struct ec_response_board_version { uint16_t board_version; -} __packed; +} __ec_align2; /* * Read memory-mapped data. @@ -655,7 +689,7 @@ struct ec_response_board_version { struct ec_params_read_memmap { uint8_t offset; uint8_t size; -} __packed; +} __ec_align1; /* Read versions supported for a command */ #define EC_CMD_GET_CMD_VERSIONS 0x08 @@ -666,7 +700,7 @@ struct ec_params_read_memmap { */ struct ec_params_get_cmd_versions { uint8_t cmd; -} __packed; +} __ec_align1; /** * struct ec_params_get_cmd_versions_v1 - Parameters for the get command @@ -675,7 +709,7 @@ struct ec_params_get_cmd_versions { */ struct ec_params_get_cmd_versions_v1 { uint16_t cmd; -} __packed; +} __ec_align2; /** * struct ec_response_get_cmd_version - Response to the get command versions. @@ -684,7 +718,7 @@ struct ec_params_get_cmd_versions_v1 { */ struct ec_response_get_cmd_versions { uint32_t version_mask; -} __packed; +} __ec_align4; /* * Check EC communications status (busy). This is needed on i2c/spi but not @@ -707,7 +741,7 @@ enum ec_comms_status { */ struct ec_response_get_comms_status { uint32_t flags; /* Mask of enum ec_comms_status */ -} __packed; +} __ec_align4; /* Fake a variety of responses, purely for testing purposes. */ #define EC_CMD_TEST_PROTOCOL 0x0a @@ -717,12 +751,12 @@ struct ec_params_test_protocol { uint32_t ec_result; uint32_t ret_len; uint8_t buf[32]; -} __packed; +} __ec_align4; /* Here it comes... */ struct ec_response_test_protocol { uint8_t buf[32]; -} __packed; +} __ec_align4; /* Get protocol information */ #define EC_CMD_GET_PROTOCOL_INFO 0x0b @@ -745,7 +779,7 @@ struct ec_response_get_protocol_info { uint16_t max_request_packet_size; uint16_t max_response_packet_size; uint32_t flags; -} __packed; +} __ec_align4; /*****************************************************************************/ @@ -763,12 +797,12 @@ struct ec_response_get_protocol_info { struct ec_params_get_set_value { uint32_t flags; uint32_t value; -} __packed; +} __ec_align4; struct ec_response_get_set_value { uint32_t flags; uint32_t value; -} __packed; +} __ec_align4; /* More than one command can use these structs to get/set parameters. */ #define EC_CMD_GSV_PAUSE_IN_S5 0x0c @@ -893,7 +927,7 @@ enum ec_feature_code { struct ec_response_get_features { uint32_t flags[2]; -} __packed; +} __ec_align4; /*****************************************************************************/ /* Flash commands */ @@ -918,7 +952,7 @@ struct ec_response_flash_info { uint32_t write_block_size; uint32_t erase_block_size; uint32_t protect_block_size; -} __packed; +} __ec_align4; /* * Flags for version 1+ flash info command @@ -959,7 +993,7 @@ struct ec_response_flash_info_1 { /* Version 1 adds these fields: */ uint32_t write_ideal_size; uint32_t flags; -} __packed; +} __ec_align4; /* * Read flash @@ -976,7 +1010,7 @@ struct ec_response_flash_info_1 { struct ec_params_flash_read { uint32_t offset; uint32_t size; -} __packed; +} __ec_align4; /* Write flash */ #define EC_CMD_FLASH_WRITE 0x12 @@ -994,7 +1028,7 @@ struct ec_params_flash_write { uint32_t offset; uint32_t size; /* Followed by data to write */ -} __packed; +} __ec_align4; /* Erase flash */ #define EC_CMD_FLASH_ERASE 0x13 @@ -1007,7 +1041,7 @@ struct ec_params_flash_write { struct ec_params_flash_erase { uint32_t offset; uint32_t size; -} __packed; +} __ec_align4; /* * Get/set flash protection. @@ -1053,7 +1087,7 @@ struct ec_params_flash_erase { struct ec_params_flash_protect { uint32_t mask; uint32_t flags; -} __packed; +} __ec_align4; /** * struct ec_response_flash_protect - Response to the flash protect command. @@ -1068,7 +1102,7 @@ struct ec_response_flash_protect { uint32_t flags; uint32_t valid_flags; uint32_t writable_flags; -} __packed; +} __ec_align4; /* * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash @@ -1100,12 +1134,12 @@ enum ec_flash_region { */ struct ec_params_flash_region_info { uint32_t region; -} __packed; +} __ec_align4; struct ec_response_flash_region_info { uint32_t offset; uint32_t size; -} __packed; +} __ec_align4; /* Read/write VbNvContext */ #define EC_CMD_VBNV_CONTEXT 0x17 @@ -1120,11 +1154,11 @@ enum ec_vbnvcontext_op { struct ec_params_vbnvcontext { uint32_t op; uint8_t block[EC_VBNV_BLOCK_SIZE]; -} __packed; +} __ec_align4; struct ec_response_vbnvcontext { uint8_t block[EC_VBNV_BLOCK_SIZE]; -} __packed; +} __ec_align4; /*****************************************************************************/ /* PWM commands */ @@ -1134,14 +1168,14 @@ struct ec_response_vbnvcontext { struct ec_response_pwm_get_fan_rpm { uint32_t rpm; -} __packed; +} __ec_align4; /* Set target fan RPM */ -#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x21 +#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x0021 struct ec_params_pwm_set_fan_target_rpm { uint32_t rpm; -} __packed; +} __ec_align_size1; /* Get keyboard backlight */ #define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x22 @@ -1149,21 +1183,21 @@ struct ec_params_pwm_set_fan_target_rpm { struct ec_response_pwm_get_keyboard_backlight { uint8_t percent; uint8_t enabled; -} __packed; +} __ec_align1; /* Set keyboard backlight */ #define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x23 struct ec_params_pwm_set_keyboard_backlight { uint8_t percent; -} __packed; +} __ec_align1; /* Set target fan PWM duty cycle */ #define EC_CMD_PWM_SET_FAN_DUTY 0x24 struct ec_params_pwm_set_fan_duty { uint32_t percent; -} __packed; +} __ec_align4; #define EC_CMD_PWM_SET_DUTY 0x25 /* 16 bit duty cycle, 0xffff = 100% */ @@ -1183,18 +1217,18 @@ struct ec_params_pwm_set_duty { uint16_t duty; /* Duty cycle, EC_PWM_MAX_DUTY = 100% */ uint8_t pwm_type; /* ec_pwm_type */ uint8_t index; /* Type-specific index, or 0 if unique */ -} __packed; +} __ec_align4; #define EC_CMD_PWM_GET_DUTY 0x26 struct ec_params_pwm_get_duty { uint8_t pwm_type; /* ec_pwm_type */ uint8_t index; /* Type-specific index, or 0 if unique */ -} __packed; +} __ec_align1; struct ec_response_pwm_get_duty { uint16_t duty; /* Duty cycle, EC_PWM_MAX_DUTY = 100% */ -} __packed; +} __ec_align2; /*****************************************************************************/ /* @@ -1207,7 +1241,7 @@ struct ec_response_pwm_get_duty { struct rgb_s { uint8_t r, g, b; -}; +} __ec_todo_unpacked; #define LB_BATTERY_LEVELS 4 @@ -1247,7 +1281,7 @@ struct lightbar_params_v0 { /* Color palette */ struct rgb_s color[8]; /* 0-3 are Google colors */ -} __packed; +} __ec_todo_packed; struct lightbar_params_v1 { /* Timing */ @@ -1290,14 +1324,14 @@ struct lightbar_params_v1 { /* Color palette */ struct rgb_s color[8]; /* 0-3 are Google colors */ -} __packed; +} __ec_todo_packed; /* Lightbar program */ #define EC_LB_PROG_LEN 192 struct lightbar_program { uint8_t size; uint8_t data[EC_LB_PROG_LEN]; -}; +} __ec_todo_unpacked; struct ec_params_lightbar { uint8_t cmd; /* Command (see enum lightbar_command) */ @@ -1307,23 +1341,23 @@ struct ec_params_lightbar { } dump, off, on, init, get_seq, get_params_v0, get_params_v1, version, get_brightness, get_demo, suspend, resume; - struct { + struct __ec_todo_unpacked { uint8_t num; } set_brightness, seq, demo; - struct { + struct __ec_todo_unpacked { uint8_t ctrl, reg, value; } reg; - struct { + struct __ec_todo_unpacked { uint8_t led, red, green, blue; } set_rgb; - struct { + struct __ec_todo_unpacked { uint8_t led; } get_rgb; - struct { + struct __ec_todo_unpacked { uint8_t enable; } manual_suspend_ctrl; @@ -1331,31 +1365,31 @@ struct ec_params_lightbar { struct lightbar_params_v1 set_params_v1; struct lightbar_program set_program; }; -} __packed; +} __ec_todo_packed; struct ec_response_lightbar { union { - struct { - struct { + struct __ec_todo_unpacked { + struct __ec_todo_unpacked { uint8_t reg; uint8_t ic0; uint8_t ic1; } vals[23]; } dump; - struct { + struct __ec_todo_unpacked { uint8_t num; } get_seq, get_brightness, get_demo; struct lightbar_params_v0 get_params_v0; struct lightbar_params_v1 get_params_v1; - struct { + struct __ec_todo_unpacked { uint32_t num; uint32_t flags; } version; - struct { + struct __ec_todo_unpacked { uint8_t red, green, blue; } get_rgb; @@ -1365,7 +1399,7 @@ struct ec_response_lightbar { demo, set_params_v0, set_params_v1, set_program, manual_suspend_ctrl, suspend, resume; }; -} __packed; +} __ec_todo_packed; /* Lightbar commands */ enum lightbar_command { @@ -1432,7 +1466,7 @@ struct ec_params_led_control { uint8_t flags; /* Control flags */ uint8_t brightness[EC_LED_COLOR_COUNT]; -} __packed; +} __ec_align1; struct ec_response_led_control { /* @@ -1443,7 +1477,7 @@ struct ec_response_led_control { * Other values means the LED is control by PWM. */ uint8_t brightness_range[EC_LED_COLOR_COUNT]; -} __packed; +} __ec_align1; /*****************************************************************************/ /* Verified boot commands */ @@ -1464,7 +1498,7 @@ struct ec_params_vboot_hash { uint32_t offset; /* Offset in flash to hash */ uint32_t size; /* Number of bytes to hash */ uint8_t nonce_data[64]; /* Nonce data; ignored if nonce_size=0 */ -} __packed; +} __ec_align4; struct ec_response_vboot_hash { uint8_t status; /* enum ec_vboot_hash_status */ @@ -1474,7 +1508,7 @@ struct ec_response_vboot_hash { uint32_t offset; /* Offset in flash which was hashed */ uint32_t size; /* Number of bytes hashed */ uint8_t hash_digest[64]; /* Hash digest data */ -} __packed; +} __ec_align4; enum ec_vboot_hash_cmd { EC_VBOOT_HASH_GET = 0, /* Get current hash status */ @@ -1634,23 +1668,23 @@ struct ec_response_motion_sensor_data { /* Each sensor is up to 3-axis. */ union { int16_t data[3]; - struct { + struct __ec_todo_packed { uint16_t reserved; uint32_t timestamp; - } __packed; - struct { + }; + struct __ec_todo_unpacked { uint8_t activity; /* motionsensor_activity */ uint8_t state; int16_t add_info[2]; }; }; -} __packed; +} __ec_todo_packed; struct ec_params_motion_sense { uint8_t cmd; union { /* Used for MOTIONSENSE_CMD_DUMP. */ - struct { + struct __ec_todo_unpacked { /* no args */ } dump; @@ -1658,13 +1692,13 @@ struct ec_params_motion_sense { * Used for MOTIONSENSE_CMD_EC_RATE and * MOTIONSENSE_CMD_KB_WAKE_ANGLE. */ - struct { + struct __ec_todo_unpacked { /* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */ int16_t data; } ec_rate, kb_wake_angle; /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ - struct { + struct __ec_todo_packed { uint8_t sensor_num; /* @@ -1690,10 +1724,10 @@ struct ec_params_motion_sense { * Compass: 1/16 uT */ int16_t offset[3]; - } __packed sensor_offset; + } sensor_offset; /* Used for MOTIONSENSE_CMD_INFO. */ - struct { + struct __ec_todo_packed { uint8_t sensor_num; } info; @@ -1714,12 +1748,12 @@ struct ec_params_motion_sense { int32_t data; } sensor_odr, sensor_range; }; -} __packed; +} __ec_todo_packed; struct ec_response_motion_sense { union { /* Used for MOTIONSENSE_CMD_DUMP. */ - struct { + struct __ec_todo_unpacked { /* Flags representing the motion sensor module. */ uint8_t module_flags; @@ -1734,7 +1768,7 @@ struct ec_response_motion_sense { } dump; /* Used for MOTIONSENSE_CMD_INFO. */ - struct { + struct __ec_todo_unpacked { /* Should be element of enum motionsensor_type. */ uint8_t type; @@ -1753,18 +1787,18 @@ struct ec_response_motion_sense { * MOTIONSENSE_CMD_SENSOR_RANGE, and * MOTIONSENSE_CMD_KB_WAKE_ANGLE. */ - struct { + struct __ec_todo_unpacked { /* Current value of the parameter queried. */ int32_t ret; } ec_rate, sensor_odr, sensor_range, kb_wake_angle; /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ - struct { + struct __ec_todo_unpacked { int16_t temp; int16_t offset[3]; } sensor_offset, perform_calib; }; -} __packed; +} __ec_todo_packed; /*****************************************************************************/ /* USB charging control commands */ @@ -1775,7 +1809,7 @@ struct ec_response_motion_sense { struct ec_params_usb_charge_set_mode { uint8_t usb_port_id; uint8_t mode; -} __packed; +} __ec_align1; /*****************************************************************************/ /* Persistent storage for host */ @@ -1791,7 +1825,7 @@ struct ec_response_pstore_info { uint32_t pstore_size; /* Access size; read/write offset and size must be a multiple of this */ uint32_t access_size; -} __packed; +} __ec_align4; /* * Read persistent storage @@ -1803,7 +1837,7 @@ struct ec_response_pstore_info { struct ec_params_pstore_read { uint32_t offset; /* Byte offset to read */ uint32_t size; /* Size to read in bytes */ -} __packed; +} __ec_align4; /* Write persistent storage */ #define EC_CMD_PSTORE_WRITE 0x42 @@ -1812,7 +1846,7 @@ struct ec_params_pstore_write { uint32_t offset; /* Byte offset to write */ uint32_t size; /* Size to write in bytes */ uint8_t data[EC_PSTORE_SIZE_MAX]; -} __packed; +} __ec_align4; /*****************************************************************************/ /* Real-time clock */ @@ -1820,11 +1854,11 @@ struct ec_params_pstore_write { /* RTC params and response structures */ struct ec_params_rtc { uint32_t time; -} __packed; +} __ec_align4; struct ec_response_rtc { uint32_t time; -} __packed; +} __ec_align4; /* These use ec_response_rtc */ #define EC_CMD_RTC_GET_VALUE 0x44 @@ -1855,29 +1889,29 @@ enum ec_port80_subcmd { struct ec_params_port80_read { uint16_t subcmd; union { - struct { + struct __ec_todo_unpacked { uint32_t offset; uint32_t num_entries; } read_buffer; }; -} __packed; +} __ec_todo_packed; struct ec_response_port80_read { union { - struct { + struct __ec_todo_unpacked { uint32_t writes; uint32_t history_size; uint32_t last_boot; } get_info; - struct { + struct __ec_todo_unpacked { uint16_t codes[EC_PORT80_SIZE_MAX]; } data; }; -} __packed; +} __ec_todo_packed; struct ec_response_port80_last_boot { uint16_t code; -} __packed; +} __ec_align2; /*****************************************************************************/ /* Thermal engine commands. Note that there are two implementations. We'll @@ -1898,17 +1932,17 @@ struct ec_params_thermal_set_threshold { uint8_t sensor_type; uint8_t threshold_id; uint16_t value; -} __packed; +} __ec_align2; /* Version 0 - get */ struct ec_params_thermal_get_threshold { uint8_t sensor_type; uint8_t threshold_id; -} __packed; +} __ec_align1; struct ec_response_thermal_get_threshold { uint16_t value; -} __packed; +} __ec_align2; /* The version 1 structs are visible. */ @@ -1928,12 +1962,12 @@ struct ec_thermal_config { uint32_t temp_host[EC_TEMP_THRESH_COUNT]; /* levels of hotness */ uint32_t temp_fan_off; /* no active cooling needed */ uint32_t temp_fan_max; /* max active cooling needed */ -} __packed; +} __ec_align4; /* Version 1 - get config for one sensor. */ struct ec_params_thermal_get_threshold_v1 { uint32_t sensor_num; -} __packed; +} __ec_align4; /* This returns a struct ec_thermal_config */ /* @@ -1943,7 +1977,7 @@ struct ec_params_thermal_get_threshold_v1 { struct ec_params_thermal_set_threshold_v1 { uint32_t sensor_num; struct ec_thermal_config cfg; -} __packed; +} __ec_align4; /* This returns no data */ /****************************************************************************/ @@ -1956,14 +1990,14 @@ struct ec_params_thermal_set_threshold_v1 { struct ec_params_tmp006_get_calibration { uint8_t index; -} __packed; +} __ec_align1; struct ec_response_tmp006_get_calibration { float s0; float b0; float b1; float b2; -} __packed; +} __ec_align4; /* Set TMP006 calibration data */ #define EC_CMD_TMP006_SET_CALIBRATION 0x54 @@ -1975,19 +2009,19 @@ struct ec_params_tmp006_set_calibration { float b0; float b1; float b2; -} __packed; +} __ec_align4; /* Read raw TMP006 data */ #define EC_CMD_TMP006_GET_RAW 0x55 struct ec_params_tmp006_get_raw { uint8_t index; -} __packed; +} __ec_align1; struct ec_response_tmp006_get_raw { int32_t t; /* In 1/100 K */ int32_t v; /* In nV */ -}; +} __ec_align4; /*****************************************************************************/ /* MKBP - Matrix KeyBoard Protocol */ @@ -2014,12 +2048,12 @@ struct ec_response_mkbp_info { uint32_t cols; /* Formerly "switches", which was 0. */ uint8_t reserved; -} __packed; +} __ec_align_size1; struct ec_params_mkbp_info { uint8_t info_type; uint8_t event_type; -} __packed; +} __ec_align1; enum ec_mkbp_info_type { /* @@ -2067,7 +2101,7 @@ struct ec_params_mkbp_simulate_key { uint8_t col; uint8_t row; uint8_t pressed; -} __packed; +} __ec_align1; /* Configure keyboard scanning */ #define EC_CMD_MKBP_SET_CONFIG 0x64 @@ -2113,15 +2147,15 @@ struct ec_mkbp_config { uint16_t debounce_up_us; /* time for debounce on key up */ /* maximum depth to allow for fifo (0 = no keyscan output) */ uint8_t fifo_max_depth; -} __packed; +} __ec_align_size1; struct ec_params_mkbp_set_config { struct ec_mkbp_config config; -} __packed; +} __ec_align_size1; struct ec_response_mkbp_get_config { struct ec_mkbp_config config; -} __packed; +} __ec_align_size1; /* Run the key scan emulation */ #define EC_CMD_KEYSCAN_SEQ_CTRL 0x66 @@ -2144,18 +2178,18 @@ enum ec_collect_flags { struct ec_collect_item { uint8_t flags; /* some flags (enum ec_collect_flags) */ -}; +} __ec_align1; struct ec_params_keyscan_seq_ctrl { uint8_t cmd; /* Command to send (enum ec_keyscan_seq_cmd) */ union { - struct { + struct __ec_align1 { uint8_t active; /* still active */ uint8_t num_items; /* number of items */ /* Current item being presented */ uint8_t cur_item; } status; - struct { + struct __ec_todo_unpacked { /* * Absolute time for this scan, measured from the * start of the sequence. @@ -2163,22 +2197,22 @@ struct ec_params_keyscan_seq_ctrl { uint32_t time_us; uint8_t scan[0]; /* keyscan data */ } add; - struct { + struct __ec_align1 { uint8_t start_item; /* First item to return */ uint8_t num_items; /* Number of items to return */ } collect; }; -} __packed; +} __ec_todo_packed; struct ec_result_keyscan_seq_ctrl { union { - struct { + struct __ec_todo_unpacked { uint8_t num_items; /* Number of items */ /* Data for each item */ struct ec_collect_item item[0]; } collect; }; -} __packed; +} __ec_todo_packed; /* * Command for retrieving the next pending MKBP event from the EC device @@ -2216,8 +2250,8 @@ enum ec_mkbp_event { EC_MKBP_EVENT_COUNT, }; -union ec_response_get_next_data { - uint8_t key_matrix[13]; +union __ec_align_offset1 ec_response_get_next_data { + uint8_t key_matrix[13]; /* Unaligned */ uint32_t host_event; @@ -2225,9 +2259,9 @@ union ec_response_get_next_data { uint32_t buttons; uint32_t switches; uint32_t sysrq; -} __packed; +}; -union ec_response_get_next_data_v1 { +union __ec_align_offset1 ec_response_get_next_data_v1 { uint8_t key_matrix[16]; uint32_t host_event; uint32_t buttons; @@ -2235,19 +2269,19 @@ union ec_response_get_next_data_v1 { uint32_t sysrq; uint32_t cec_events; uint8_t cec_message[16]; -} __packed; +}; struct ec_response_get_next_event { uint8_t event_type; /* Followed by event data if any */ union ec_response_get_next_data data; -} __packed; +} __ec_align1; struct ec_response_get_next_event_v1 { uint8_t event_type; /* Followed by event data if any */ union ec_response_get_next_data_v1 data; -} __packed; +} __ec_align1; /* Bit indices for buttons and switches.*/ /* Buttons */ @@ -2268,12 +2302,12 @@ struct ec_response_get_next_event_v1 { struct ec_params_temp_sensor_get_info { uint8_t id; -} __packed; +} __ec_align1; struct ec_response_temp_sensor_get_info { char sensor_name[32]; uint8_t sensor_type; -} __packed; +} __ec_align1; /*****************************************************************************/ @@ -2292,11 +2326,11 @@ struct ec_response_temp_sensor_get_info { */ struct ec_params_host_event_mask { uint32_t mask; -} __packed; +} __ec_align4; struct ec_response_host_event_mask { uint32_t mask; -} __packed; +} __ec_align4; /* These all use ec_response_host_event_mask */ #define EC_CMD_HOST_EVENT_GET_B 0x87 @@ -2319,7 +2353,7 @@ struct ec_response_host_event_mask { struct ec_params_switch_enable_backlight { uint8_t enabled; -} __packed; +} __ec_align1; /* Enable/disable WLAN/Bluetooth */ #define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91 @@ -2328,7 +2362,7 @@ struct ec_params_switch_enable_backlight { /* Version 0 params; no response */ struct ec_params_switch_enable_wireless_v0 { uint8_t enabled; -} __packed; +} __ec_align1; /* Version 1 params */ struct ec_params_switch_enable_wireless_v1 { @@ -2347,7 +2381,7 @@ struct ec_params_switch_enable_wireless_v1 { /* Which flags to copy from suspend_flags */ uint8_t suspend_mask; -} __packed; +} __ec_align1; /* Version 1 response */ struct ec_response_switch_enable_wireless_v1 { @@ -2356,7 +2390,7 @@ struct ec_response_switch_enable_wireless_v1 { /* Flags to leave enabled in S3 */ uint8_t suspend_flags; -} __packed; +} __ec_align1; /*****************************************************************************/ /* GPIO commands. Only available on EC if write protect has been disabled. */ @@ -2367,7 +2401,7 @@ struct ec_response_switch_enable_wireless_v1 { struct ec_params_gpio_set { char name[32]; uint8_t val; -} __packed; +} __ec_align1; /* Get GPIO value */ #define EC_CMD_GPIO_GET 0x93 @@ -2375,37 +2409,37 @@ struct ec_params_gpio_set { /* Version 0 of input params and response */ struct ec_params_gpio_get { char name[32]; -} __packed; +} __ec_align1; struct ec_response_gpio_get { uint8_t val; -} __packed; +} __ec_align1; /* Version 1 of input params and response */ struct ec_params_gpio_get_v1 { uint8_t subcmd; union { - struct { + struct __ec_align1 { char name[32]; } get_value_by_name; - struct { + struct __ec_align1 { uint8_t index; } get_info; }; -} __packed; +} __ec_align1; struct ec_response_gpio_get_v1 { union { - struct { + struct __ec_align1 { uint8_t val; } get_value_by_name, get_count; - struct { + struct __ec_todo_unpacked { uint8_t val; char name[32]; uint32_t flags; } get_info; }; -} __packed; +} __ec_todo_packed; enum gpio_get_subcmd { EC_GPIO_GET_BY_NAME = 0, @@ -2431,11 +2465,11 @@ struct ec_params_i2c_read { uint8_t read_size; /* Either 8 or 16. */ uint8_t port; uint8_t offset; -} __packed; +} __ec_align_size1; struct ec_response_i2c_read { uint16_t data; -} __packed; +} __ec_align2; /* Write I2C bus */ #define EC_CMD_I2C_WRITE 0x95 @@ -2446,7 +2480,7 @@ struct ec_params_i2c_write { uint8_t write_size; /* Either 8 or 16. */ uint8_t port; uint8_t offset; -} __packed; +} __ec_align_size1; /*****************************************************************************/ /* Charge state commands. Only available when flash write protect unlocked. */ @@ -2465,7 +2499,7 @@ enum ec_charge_control_mode { struct ec_params_charge_control { uint32_t mode; /* enum charge_control_mode */ -} __packed; +} __ec_align4; /*****************************************************************************/ @@ -2493,7 +2527,7 @@ enum ec_console_read_subcmd { struct ec_params_console_read_v1 { uint8_t subcmd; /* enum ec_console_read_subcmd */ -} __packed; +} __ec_align1; /*****************************************************************************/ @@ -2511,7 +2545,7 @@ struct ec_params_console_read_v1 { struct ec_params_battery_cutoff { uint8_t flags; -} __packed; +} __ec_align1; /*****************************************************************************/ /* USB port mux control. */ @@ -2523,7 +2557,7 @@ struct ec_params_battery_cutoff { struct ec_params_usb_mux { uint8_t mux; -} __packed; +} __ec_align1; /*****************************************************************************/ /* LDOs / FETs control. */ @@ -2541,7 +2575,7 @@ enum ec_ldo_state { struct ec_params_ldo_set { uint8_t index; uint8_t state; -} __packed; +} __ec_align1; /* * Get LDO state. @@ -2550,11 +2584,11 @@ struct ec_params_ldo_set { struct ec_params_ldo_get { uint8_t index; -} __packed; +} __ec_align1; struct ec_response_ldo_get { uint8_t state; -} __packed; +} __ec_align1; /*****************************************************************************/ /* Power info. */ @@ -2570,7 +2604,7 @@ struct ec_response_power_info { uint16_t voltage_system; uint16_t current_system; uint16_t usb_current_limit; -} __packed; +} __ec_align4; /*****************************************************************************/ /* I2C passthru command */ @@ -2592,20 +2626,20 @@ struct ec_response_power_info { struct ec_params_i2c_passthru_msg { uint16_t addr_flags; /* I2C slave address (7 or 10 bits) and flags */ uint16_t len; /* Number of bytes to read or write */ -} __packed; +} __ec_align2; struct ec_params_i2c_passthru { uint8_t port; /* I2C port number */ uint8_t num_msgs; /* Number of messages */ struct ec_params_i2c_passthru_msg msg[]; /* Data to write for all messages is concatenated here */ -} __packed; +} __ec_align2; struct ec_response_i2c_passthru { uint8_t i2c_status; /* Status flags (EC_I2C_STATUS_...) */ uint8_t num_msgs; /* Number of messages processed */ uint8_t data[]; /* Data read by messages concatenated here */ -} __packed; +} __ec_align1; /*****************************************************************************/ /* Power button hang detect */ @@ -2660,7 +2694,7 @@ struct ec_params_hang_detect { /* Timeout in msec before generating warm reboot, if enabled */ uint16_t warm_reboot_timeout_msec; -} __packed; +} __ec_align4; /*****************************************************************************/ /* Commands for battery charging */ @@ -2706,20 +2740,20 @@ struct ec_params_charge_state { /* no args */ } get_state; - struct { + struct __ec_todo_unpacked { uint32_t param; /* enum charge_state_param */ } get_param; - struct { + struct __ec_todo_unpacked { uint32_t param; /* param to set */ uint32_t value; /* value to set */ } set_param; }; -} __packed; +} __ec_todo_packed; struct ec_response_charge_state { union { - struct { + struct __ec_align4 { int ac; int chg_voltage; int chg_current; @@ -2727,14 +2761,14 @@ struct ec_response_charge_state { int batt_state_of_charge; } get_state; - struct { + struct __ec_align4 { uint32_t value; } get_param; struct { /* no return values */ } set_param; }; -} __packed; +} __ec_align4; /* @@ -2744,7 +2778,7 @@ struct ec_response_charge_state { struct ec_params_current_limit { uint32_t limit; /* in mA */ -} __packed; +} __ec_align4; /* * Set maximum external voltage / current. @@ -2755,7 +2789,7 @@ struct ec_params_current_limit { struct ec_params_external_power_limit_v1 { uint16_t current_lim; /* in mA, or EC_POWER_LIMIT_NONE to clear limit */ uint16_t voltage_lim; /* in mV, or EC_POWER_LIMIT_NONE to clear limit */ -} __packed; +} __ec_align2; #define EC_POWER_LIMIT_NONE 0xffff @@ -2771,7 +2805,7 @@ enum host_sleep_event { struct ec_params_host_sleep_event { uint8_t sleep_event; -} __packed; +} __ec_align1; /* * Use a default timeout value (CONFIG_SLEEP_TIMEOUT_MS) for detecting sleep @@ -2802,7 +2836,7 @@ struct ec_params_host_sleep_event_v1 { /* No parameters for non-suspend messages. */ }; -} __packed; +} __ec_align2; /* A timeout occurred when this bit is set */ #define EC_HOST_RESUME_SLEEP_TIMEOUT 0x80000000 @@ -2828,7 +2862,7 @@ struct ec_response_host_sleep_event_v1 { /* No response fields for non-resume messages. */ }; -} __packed; +} __ec_align4; /*****************************************************************************/ /* Smart battery pass-through */ @@ -2845,25 +2879,25 @@ struct ec_response_host_sleep_event_v1 { struct ec_params_sb_rd { uint8_t reg; -} __packed; +} __ec_align1; struct ec_response_sb_rd_word { uint16_t value; -} __packed; +} __ec_align2; struct ec_params_sb_wr_word { uint8_t reg; uint16_t value; -} __packed; +} __ec_align1; struct ec_response_sb_rd_block { uint8_t data[32]; -} __packed; +} __ec_align1; struct ec_params_sb_wr_block { uint8_t reg; uint16_t data[32]; -} __packed; +} __ec_align1; /*****************************************************************************/ /* Battery vendor parameters @@ -2885,11 +2919,11 @@ struct ec_params_battery_vendor_param { uint32_t param; uint32_t value; uint8_t mode; -} __packed; +} __ec_align_size1; struct ec_response_battery_vendor_param { uint32_t value; -} __packed; +} __ec_align4; /*****************************************************************************/ /* Commands for I2S recording on audio codec. */ @@ -2920,7 +2954,7 @@ enum ec_i2s_config { EC_DAI_FMT_PCM_TDM = 5, }; -struct ec_param_codec_i2s { +struct __ec_todo_packed ec_param_codec_i2s { /* enum ec_codec_i2s_subcmd */ uint8_t cmd; union { @@ -2934,10 +2968,10 @@ struct ec_param_codec_i2s { * EC_CODEC_SET_GAIN * Value should be 0~43 for both channels. */ - struct ec_param_codec_i2s_set_gain { + struct __ec_align1 ec_param_codec_i2s_set_gain { uint8_t left; uint8_t right; - } __packed gain; + } gain; /* * EC_CODEC_I2S_ENABLE @@ -2955,7 +2989,7 @@ struct ec_param_codec_i2s { * EC_CODEC_I2S_SET_TDM_CONFIG * Value should be one of ec_i2s_config. */ - struct ec_param_codec_i2s_tdm { + struct __ec_todo_unpacked ec_param_codec_i2s_tdm { /* * 0 to 496 */ @@ -2966,14 +3000,14 @@ struct ec_param_codec_i2s { int16_t ch1_delay; uint8_t adjacent_to_ch0; uint8_t adjacent_to_ch1; - } __packed tdm_param; + } tdm_param; /* * EC_CODEC_I2S_SET_BCLK */ uint32_t bclk; }; -} __packed; +}; /* * For subcommand EC_CODEC_GET_GAIN. @@ -2981,7 +3015,7 @@ struct ec_param_codec_i2s { struct ec_response_codec_gain { uint8_t left; uint8_t right; -} __packed; +} __ec_align1; /*****************************************************************************/ /* System commands */ @@ -3010,7 +3044,7 @@ enum ec_reboot_cmd { struct ec_params_reboot_ec { uint8_t cmd; /* enum ec_reboot_cmd */ uint8_t flags; /* See EC_REBOOT_FLAG_* */ -} __packed; +} __ec_align1; /* * Get information on last EC panic. @@ -3153,7 +3187,7 @@ struct ec_params_reboot_ec { */ struct ec_params_cec_write { uint8_t msg[EC_MAX_CEC_MSG_LEN]; -} __packed; +} __ec_align1; /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA @@ -3169,7 +3203,7 @@ struct ec_params_cec_write { struct ec_params_cec_set { uint8_t cmd; /* enum cec_command */ uint8_t val; -} __packed; +} __ec_align1; /* Read various CEC parameters */ #define EC_CMD_CEC_GET 0x00BB @@ -3180,7 +3214,7 @@ struct ec_params_cec_set { */ struct ec_params_cec_get { uint8_t cmd; /* enum cec_command */ -} __packed; +} __ec_align1; /** * struct ec_response_cec_get - CEC parameters get response @@ -3191,7 +3225,7 @@ struct ec_params_cec_get { */ struct ec_response_cec_get { uint8_t val; -} __packed; +} __ec_align1; /* CEC parameters command */ enum ec_cec_command { @@ -3264,13 +3298,13 @@ enum mkbp_cec_event { /* Status of EC being sent to PD */ struct ec_params_pd_status { int8_t batt_soc; /* battery state of charge */ -} __packed; +} __ec_align1; /* Status of PD being sent back to EC */ struct ec_response_pd_status { int8_t status; /* PD MCU status */ uint32_t curr_lim_ma; /* input current limit */ -} __packed; +} __ec_align_size1; /* Set USB type-C port role and muxes */ #define EC_CMD_USB_PD_CONTROL 0x101 @@ -3305,7 +3339,7 @@ struct ec_params_usb_pd_control { uint8_t role; uint8_t mux; uint8_t swap; -} __packed; +} __ec_align1; #define PD_CTRL_RESP_ENABLED_COMMS (1 << 0) /* Communication enabled */ #define PD_CTRL_RESP_ENABLED_CONNECTED (1 << 1) /* Device connected */ @@ -3324,7 +3358,7 @@ struct ec_response_usb_pd_control_v1 { uint8_t role; uint8_t polarity; char state[32]; -} __packed; +} __ec_align1; #define EC_CMD_USB_PD_PORTS 0x102 @@ -3333,14 +3367,14 @@ struct ec_response_usb_pd_control_v1 { struct ec_response_usb_pd_ports { uint8_t num_ports; -} __packed; +} __ec_align1; #define EC_CMD_USB_PD_POWER_INFO 0x103 #define PD_POWER_CHARGING_PORT 0xff struct ec_params_usb_pd_power_info { uint8_t port; -} __packed; +} __ec_align1; enum usb_chg_type { USB_CHG_TYPE_NONE, @@ -3366,7 +3400,7 @@ struct usb_chg_measures { uint16_t voltage_now; uint16_t current_max; uint16_t current_lim; -} __packed; +} __ec_align2; struct ec_response_usb_pd_power_info { uint8_t role; @@ -3375,11 +3409,11 @@ struct ec_response_usb_pd_power_info { uint8_t reserved1; struct usb_chg_measures meas; uint32_t max_power; -} __packed; +} __ec_align4; struct ec_params_usb_pd_info_request { uint8_t port; -} __packed; +} __ec_align1; /* * This command will return the number of USB PD charge port + the number @@ -3389,7 +3423,7 @@ struct ec_params_usb_pd_info_request { #define EC_CMD_CHARGE_PORT_COUNT 0x0105 struct ec_response_charge_port_count { uint8_t port_count; -} __packed; +} __ec_align1; /* Read USB-PD Device discovery info */ #define EC_CMD_USB_PD_DISCOVERY 0x0113 @@ -3397,7 +3431,7 @@ struct ec_params_usb_pd_discovery_entry { uint16_t vid; /* USB-IF VID */ uint16_t pid; /* USB-IF PID */ uint8_t ptype; /* product type (hub,periph,cable,ama) */ -} __packed; +} __ec_align_size1; /* Override default charge behavior */ #define EC_CMD_PD_CHARGE_PORT_OVERRIDE 0x0114 @@ -3411,7 +3445,7 @@ enum usb_pd_override_ports { struct ec_params_charge_port_override { int16_t override_port; /* Override port# */ -} __packed; +} __ec_align2; /* Read (and delete) one entry of PD event log */ #define EC_CMD_PD_GET_LOG_ENTRY 0x0115 @@ -3422,7 +3456,7 @@ struct ec_response_pd_log { uint8_t size_port; /* [7:5] port number [4:0] payload size in bytes */ uint16_t data; /* type-defined data payload */ uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */ -} __packed; +} __ec_align4; /* The timestamp is the microsecond counter shifted to get about a ms. */ #define PD_LOG_TIMESTAMP_SHIFT 10 /* 1 LSB = 1024us */ @@ -3488,14 +3522,14 @@ struct mcdp_version { uint8_t major; uint8_t minor; uint16_t build; -} __packed; +} __ec_align4; struct mcdp_info { uint8_t family[2]; uint8_t chipid[2]; struct mcdp_version irom; struct mcdp_version fw; -} __packed; +} __ec_align4; /* struct mcdp_info field decoding */ #define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1]) @@ -3506,7 +3540,7 @@ struct mcdp_info { struct ec_params_usb_pd_mux_info { uint8_t port; /* USB-C port number */ -} __packed; +} __ec_align1; /* Flags representing mux state */ #define USB_PD_MUX_USB_ENABLED (1 << 0) @@ -3516,7 +3550,7 @@ struct ec_params_usb_pd_mux_info { struct ec_response_usb_pd_mux_info { uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */ -} __packed; +} __ec_align1; /*****************************************************************************/ /* -- cgit v1.2.3 From ff8343328bb91f9f1e211f4ab65ba5eb5bc6d3dc Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:36 -0700 Subject: mfd: cros_ec: Define commands as 4-digit UPPER CASE hex values This change is required for compilation of embedded controller firmware to work properly (See CONFIG_HOSTCMD_SECTION_SORTED). Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 235 ++++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index c12ae9742e20..8ad77d8a9141 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -553,6 +553,9 @@ struct ec_host_response { * Parameter/response length is implicit in the structs. Some underlying * communication protocols (I2C, SPI) may add length or checksum headers, but * those are implementation-dependent and not defined here. + * + * All commands MUST be #defined to be 4-digit UPPER CASE hex values + * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work. */ /*****************************************************************************/ @@ -562,7 +565,7 @@ struct ec_host_response { * Get protocol version, used to deal with non-backward compatible protocol * changes. */ -#define EC_CMD_PROTO_VERSION 0x00 +#define EC_CMD_PROTO_VERSION 0x0000 /** * struct ec_response_proto_version - Response to the proto version command. @@ -576,7 +579,7 @@ struct ec_response_proto_version { * Hello. This is a simple command to test the EC is responsive to * commands. */ -#define EC_CMD_HELLO 0x01 +#define EC_CMD_HELLO 0x0001 /** * struct ec_params_hello - Parameters to the hello command. @@ -595,7 +598,7 @@ struct ec_response_hello { } __ec_align4; /* Get version number */ -#define EC_CMD_GET_VERSION 0x02 +#define EC_CMD_GET_VERSION 0x0002 enum ec_current_image { EC_IMAGE_UNKNOWN = 0, @@ -618,7 +621,7 @@ struct ec_response_get_version { } __ec_align4; /* Read test */ -#define EC_CMD_READ_TEST 0x03 +#define EC_CMD_READ_TEST 0x0003 /** * struct ec_params_read_test - Parameters for the read test command. @@ -643,10 +646,10 @@ struct ec_response_read_test { * * Response is null-terminated string. */ -#define EC_CMD_GET_BUILD_INFO 0x04 +#define EC_CMD_GET_BUILD_INFO 0x0004 /* Get chip info */ -#define EC_CMD_GET_CHIP_INFO 0x05 +#define EC_CMD_GET_CHIP_INFO 0x0005 /** * struct ec_response_get_chip_info - Response to the get chip info command. @@ -661,7 +664,7 @@ struct ec_response_get_chip_info { } __ec_align4; /* Get board HW version */ -#define EC_CMD_GET_BOARD_VERSION 0x06 +#define EC_CMD_GET_BOARD_VERSION 0x0006 /** * struct ec_response_board_version - Response to the board version command. @@ -679,7 +682,7 @@ struct ec_response_board_version { * * Response is params.size bytes of data. */ -#define EC_CMD_READ_MEMMAP 0x07 +#define EC_CMD_READ_MEMMAP 0x0007 /** * struct ec_params_read_memmap - Parameters for the read memory map command. @@ -692,7 +695,7 @@ struct ec_params_read_memmap { } __ec_align1; /* Read versions supported for a command */ -#define EC_CMD_GET_CMD_VERSIONS 0x08 +#define EC_CMD_GET_CMD_VERSIONS 0x0008 /** * struct ec_params_get_cmd_versions - Parameters for the get command versions. @@ -727,7 +730,7 @@ struct ec_response_get_cmd_versions { * lpc must read the status from the command register. Attempting this on * lpc will overwrite the args/parameter space and corrupt its data. */ -#define EC_CMD_GET_COMMS_STATUS 0x09 +#define EC_CMD_GET_COMMS_STATUS 0x0009 /* Avoid using ec_status which is for return values */ enum ec_comms_status { @@ -744,7 +747,7 @@ struct ec_response_get_comms_status { } __ec_align4; /* Fake a variety of responses, purely for testing purposes. */ -#define EC_CMD_TEST_PROTOCOL 0x0a +#define EC_CMD_TEST_PROTOCOL 0x000A /* Tell the EC what to send back to us. */ struct ec_params_test_protocol { @@ -759,7 +762,7 @@ struct ec_response_test_protocol { } __ec_align4; /* Get protocol information */ -#define EC_CMD_GET_PROTOCOL_INFO 0x0b +#define EC_CMD_GET_PROTOCOL_INFO 0x000B /* Flags for ec_response_get_protocol_info.flags */ /* EC_RES_IN_PROGRESS may be returned if a command is slow */ @@ -805,11 +808,11 @@ struct ec_response_get_set_value { } __ec_align4; /* More than one command can use these structs to get/set parameters. */ -#define EC_CMD_GSV_PAUSE_IN_S5 0x0c +#define EC_CMD_GSV_PAUSE_IN_S5 0x000C /*****************************************************************************/ /* List the features supported by the firmware */ -#define EC_CMD_GET_FEATURES 0x0d +#define EC_CMD_GET_FEATURES 0x000D /* Supported features */ enum ec_feature_code { @@ -933,7 +936,7 @@ struct ec_response_get_features { /* Flash commands */ /* Get flash info */ -#define EC_CMD_FLASH_INFO 0x10 +#define EC_CMD_FLASH_INFO 0x0010 /** * struct ec_response_flash_info - Response to the flash info command. @@ -1000,7 +1003,7 @@ struct ec_response_flash_info_1 { * * Response is params.size bytes of data. */ -#define EC_CMD_FLASH_READ 0x11 +#define EC_CMD_FLASH_READ 0x0011 /** * struct ec_params_flash_read - Parameters for the flash read command. @@ -1013,7 +1016,7 @@ struct ec_params_flash_read { } __ec_align4; /* Write flash */ -#define EC_CMD_FLASH_WRITE 0x12 +#define EC_CMD_FLASH_WRITE 0x0012 #define EC_VER_FLASH_WRITE 1 /* Version 0 of the flash command supported only 64 bytes of data */ @@ -1031,7 +1034,7 @@ struct ec_params_flash_write { } __ec_align4; /* Erase flash */ -#define EC_CMD_FLASH_ERASE 0x13 +#define EC_CMD_FLASH_ERASE 0x0013 /** * struct ec_params_flash_erase - Parameters for the flash erase command. @@ -1053,7 +1056,7 @@ struct ec_params_flash_erase { * * If mask=0, simply returns the current flags state. */ -#define EC_CMD_FLASH_PROTECT 0x15 +#define EC_CMD_FLASH_PROTECT 0x0015 #define EC_VER_FLASH_PROTECT 1 /* Command version 1 */ /* Flags for flash protection */ @@ -1110,7 +1113,7 @@ struct ec_response_flash_protect { */ /* Get the region offset/size */ -#define EC_CMD_FLASH_REGION_INFO 0x16 +#define EC_CMD_FLASH_REGION_INFO 0x0016 #define EC_VER_FLASH_REGION_INFO 1 enum ec_flash_region { @@ -1142,7 +1145,7 @@ struct ec_response_flash_region_info { } __ec_align4; /* Read/write VbNvContext */ -#define EC_CMD_VBNV_CONTEXT 0x17 +#define EC_CMD_VBNV_CONTEXT 0x0017 #define EC_VER_VBNV_CONTEXT 1 #define EC_VBNV_BLOCK_SIZE 16 @@ -1164,7 +1167,7 @@ struct ec_response_vbnvcontext { /* PWM commands */ /* Get fan target RPM */ -#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x20 +#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x0020 struct ec_response_pwm_get_fan_rpm { uint32_t rpm; @@ -1178,7 +1181,7 @@ struct ec_params_pwm_set_fan_target_rpm { } __ec_align_size1; /* Get keyboard backlight */ -#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x22 +#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x0022 struct ec_response_pwm_get_keyboard_backlight { uint8_t percent; @@ -1186,20 +1189,20 @@ struct ec_response_pwm_get_keyboard_backlight { } __ec_align1; /* Set keyboard backlight */ -#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x23 +#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x0023 struct ec_params_pwm_set_keyboard_backlight { uint8_t percent; } __ec_align1; /* Set target fan PWM duty cycle */ -#define EC_CMD_PWM_SET_FAN_DUTY 0x24 +#define EC_CMD_PWM_SET_FAN_DUTY 0x0024 struct ec_params_pwm_set_fan_duty { uint32_t percent; } __ec_align4; -#define EC_CMD_PWM_SET_DUTY 0x25 +#define EC_CMD_PWM_SET_DUTY 0x0025 /* 16 bit duty cycle, 0xffff = 100% */ #define EC_PWM_MAX_DUTY 0xffff @@ -1219,7 +1222,7 @@ struct ec_params_pwm_set_duty { uint8_t index; /* Type-specific index, or 0 if unique */ } __ec_align4; -#define EC_CMD_PWM_GET_DUTY 0x26 +#define EC_CMD_PWM_GET_DUTY 0x0026 struct ec_params_pwm_get_duty { uint8_t pwm_type; /* ec_pwm_type */ @@ -1237,7 +1240,7 @@ struct ec_response_pwm_get_duty { * into a subcommand. We'll make separate structs for subcommands with * different input args, so that we know how much to expect. */ -#define EC_CMD_LIGHTBAR_CMD 0x28 +#define EC_CMD_LIGHTBAR_CMD 0x0028 struct rgb_s { uint8_t r, g, b; @@ -1431,7 +1434,7 @@ enum lightbar_command { /*****************************************************************************/ /* LED control commands */ -#define EC_CMD_LED_CONTROL 0x29 +#define EC_CMD_LED_CONTROL 0x0029 enum ec_led_id { /* LED to indicate battery state of charge */ @@ -1488,7 +1491,7 @@ struct ec_response_led_control { */ /* Verified boot hash command */ -#define EC_CMD_VBOOT_HASH 0x2A +#define EC_CMD_VBOOT_HASH 0x002A struct ec_params_vboot_hash { uint8_t cmd; /* enum ec_vboot_hash_cmd */ @@ -1540,7 +1543,7 @@ enum ec_vboot_hash_status { * Motion sense commands. We'll make separate structs for sub-commands with * different input args, so that we know how much to expect. */ -#define EC_CMD_MOTION_SENSE_CMD 0x2B +#define EC_CMD_MOTION_SENSE_CMD 0x002B /* Motion sense commands */ enum motionsense_command { @@ -1804,7 +1807,7 @@ struct ec_response_motion_sense { /* USB charging control commands */ /* Set USB port charging mode */ -#define EC_CMD_USB_CHARGE_SET_MODE 0x30 +#define EC_CMD_USB_CHARGE_SET_MODE 0x0030 struct ec_params_usb_charge_set_mode { uint8_t usb_port_id; @@ -1818,7 +1821,7 @@ struct ec_params_usb_charge_set_mode { #define EC_PSTORE_SIZE_MAX 64 /* Get persistent storage info */ -#define EC_CMD_PSTORE_INFO 0x40 +#define EC_CMD_PSTORE_INFO 0x0040 struct ec_response_pstore_info { /* Persistent storage size, in bytes */ @@ -1832,7 +1835,7 @@ struct ec_response_pstore_info { * * Response is params.size bytes of data. */ -#define EC_CMD_PSTORE_READ 0x41 +#define EC_CMD_PSTORE_READ 0x0041 struct ec_params_pstore_read { uint32_t offset; /* Byte offset to read */ @@ -1840,7 +1843,7 @@ struct ec_params_pstore_read { } __ec_align4; /* Write persistent storage */ -#define EC_CMD_PSTORE_WRITE 0x42 +#define EC_CMD_PSTORE_WRITE 0x0042 struct ec_params_pstore_write { uint32_t offset; /* Byte offset to write */ @@ -1861,12 +1864,12 @@ struct ec_response_rtc { } __ec_align4; /* These use ec_response_rtc */ -#define EC_CMD_RTC_GET_VALUE 0x44 -#define EC_CMD_RTC_GET_ALARM 0x45 +#define EC_CMD_RTC_GET_VALUE 0x0044 +#define EC_CMD_RTC_GET_ALARM 0x0045 /* These all use ec_params_rtc */ -#define EC_CMD_RTC_SET_VALUE 0x46 -#define EC_CMD_RTC_SET_ALARM 0x47 +#define EC_CMD_RTC_SET_VALUE 0x0046 +#define EC_CMD_RTC_SET_ALARM 0x0047 /* Pass as time param to SET_ALARM to clear the current alarm */ #define EC_RTC_ALARM_CLEAR 0 @@ -1878,8 +1881,8 @@ struct ec_response_rtc { #define EC_PORT80_SIZE_MAX 32 /* Get last port80 code from previous boot */ -#define EC_CMD_PORT80_LAST_BOOT 0x48 -#define EC_CMD_PORT80_READ 0x48 +#define EC_CMD_PORT80_LAST_BOOT 0x0048 +#define EC_CMD_PORT80_READ 0x0048 enum ec_port80_subcmd { EC_PORT80_GET_INFO = 0, @@ -1920,8 +1923,8 @@ struct ec_response_port80_last_boot { * Version 1 separates the CPU thermal limits from the fan control. */ -#define EC_CMD_THERMAL_SET_THRESHOLD 0x50 -#define EC_CMD_THERMAL_GET_THRESHOLD 0x51 +#define EC_CMD_THERMAL_SET_THRESHOLD 0x0050 +#define EC_CMD_THERMAL_GET_THRESHOLD 0x0051 /* The version 0 structs are opaque. You have to know what they are for * the get/set commands to make any sense. @@ -1983,10 +1986,10 @@ struct ec_params_thermal_set_threshold_v1 { /****************************************************************************/ /* Toggle automatic fan control */ -#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x52 +#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052 /* Get TMP006 calibration data */ -#define EC_CMD_TMP006_GET_CALIBRATION 0x53 +#define EC_CMD_TMP006_GET_CALIBRATION 0x0053 struct ec_params_tmp006_get_calibration { uint8_t index; @@ -2000,7 +2003,7 @@ struct ec_response_tmp006_get_calibration { } __ec_align4; /* Set TMP006 calibration data */ -#define EC_CMD_TMP006_SET_CALIBRATION 0x54 +#define EC_CMD_TMP006_SET_CALIBRATION 0x0054 struct ec_params_tmp006_set_calibration { uint8_t index; @@ -2012,7 +2015,7 @@ struct ec_params_tmp006_set_calibration { } __ec_align4; /* Read raw TMP006 data */ -#define EC_CMD_TMP006_GET_RAW 0x55 +#define EC_CMD_TMP006_GET_RAW 0x0055 struct ec_params_tmp006_get_raw { uint8_t index; @@ -2036,12 +2039,12 @@ struct ec_response_tmp006_get_raw { * to obtain the instantaneous state, use EC_CMD_MKBP_INFO with the type * EC_MKBP_INFO_CURRENT and event EC_MKBP_EVENT_KEY_MATRIX. */ -#define EC_CMD_MKBP_STATE 0x60 +#define EC_CMD_MKBP_STATE 0x0060 /* * Provide information about various MKBP things. See enum ec_mkbp_info_type. */ -#define EC_CMD_MKBP_INFO 0x61 +#define EC_CMD_MKBP_INFO 0x0061 struct ec_response_mkbp_info { uint32_t rows; @@ -2095,7 +2098,7 @@ enum ec_mkbp_info_type { }; /* Simulate key press */ -#define EC_CMD_MKBP_SIMULATE_KEY 0x62 +#define EC_CMD_MKBP_SIMULATE_KEY 0x0062 struct ec_params_mkbp_simulate_key { uint8_t col; @@ -2104,8 +2107,8 @@ struct ec_params_mkbp_simulate_key { } __ec_align1; /* Configure keyboard scanning */ -#define EC_CMD_MKBP_SET_CONFIG 0x64 -#define EC_CMD_MKBP_GET_CONFIG 0x65 +#define EC_CMD_MKBP_SET_CONFIG 0x0064 +#define EC_CMD_MKBP_GET_CONFIG 0x0065 /* flags */ enum mkbp_config_flags { @@ -2158,7 +2161,7 @@ struct ec_response_mkbp_get_config { } __ec_align_size1; /* Run the key scan emulation */ -#define EC_CMD_KEYSCAN_SEQ_CTRL 0x66 +#define EC_CMD_KEYSCAN_SEQ_CTRL 0x0066 enum ec_keyscan_seq_cmd { EC_KEYSCAN_SEQ_STATUS = 0, /* Get status information */ @@ -2219,7 +2222,7 @@ struct ec_result_keyscan_seq_ctrl { * * The device replies with UNAVAILABLE if there aren't any pending events. */ -#define EC_CMD_GET_NEXT_EVENT 0x67 +#define EC_CMD_GET_NEXT_EVENT 0x0067 enum ec_mkbp_event { /* Keyboard matrix changed. The event data is the new matrix state. */ @@ -2298,7 +2301,7 @@ struct ec_response_get_next_event_v1 { /* Temperature sensor commands */ /* Read temperature sensor info */ -#define EC_CMD_TEMP_SENSOR_GET_INFO 0x70 +#define EC_CMD_TEMP_SENSOR_GET_INFO 0x0070 struct ec_params_temp_sensor_get_info { uint8_t id; @@ -2333,30 +2336,30 @@ struct ec_response_host_event_mask { } __ec_align4; /* These all use ec_response_host_event_mask */ -#define EC_CMD_HOST_EVENT_GET_B 0x87 -#define EC_CMD_HOST_EVENT_GET_SMI_MASK 0x88 -#define EC_CMD_HOST_EVENT_GET_SCI_MASK 0x89 -#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x8d +#define EC_CMD_HOST_EVENT_GET_B 0x0087 +#define EC_CMD_HOST_EVENT_GET_SMI_MASK 0x0088 +#define EC_CMD_HOST_EVENT_GET_SCI_MASK 0x0089 +#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x008D /* These all use ec_params_host_event_mask */ -#define EC_CMD_HOST_EVENT_SET_SMI_MASK 0x8a -#define EC_CMD_HOST_EVENT_SET_SCI_MASK 0x8b -#define EC_CMD_HOST_EVENT_CLEAR 0x8c -#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x8e -#define EC_CMD_HOST_EVENT_CLEAR_B 0x8f +#define EC_CMD_HOST_EVENT_SET_SMI_MASK 0x008A +#define EC_CMD_HOST_EVENT_SET_SCI_MASK 0x008B +#define EC_CMD_HOST_EVENT_CLEAR 0x008C +#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x008E +#define EC_CMD_HOST_EVENT_CLEAR_B 0x008F /*****************************************************************************/ /* Switch commands */ /* Enable/disable LCD backlight */ -#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x90 +#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x0090 struct ec_params_switch_enable_backlight { uint8_t enabled; } __ec_align1; /* Enable/disable WLAN/Bluetooth */ -#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91 +#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x0091 #define EC_VER_SWITCH_ENABLE_WIRELESS 1 /* Version 0 params; no response */ @@ -2396,7 +2399,7 @@ struct ec_response_switch_enable_wireless_v1 { /* GPIO commands. Only available on EC if write protect has been disabled. */ /* Set GPIO output value */ -#define EC_CMD_GPIO_SET 0x92 +#define EC_CMD_GPIO_SET 0x0092 struct ec_params_gpio_set { char name[32]; @@ -2404,7 +2407,7 @@ struct ec_params_gpio_set { } __ec_align1; /* Get GPIO value */ -#define EC_CMD_GPIO_GET 0x93 +#define EC_CMD_GPIO_GET 0x0093 /* Version 0 of input params and response */ struct ec_params_gpio_get { @@ -2458,7 +2461,7 @@ enum gpio_get_subcmd { */ /* Read I2C bus */ -#define EC_CMD_I2C_READ 0x94 +#define EC_CMD_I2C_READ 0x0094 struct ec_params_i2c_read { uint16_t addr; /* 8-bit address (7-bit shifted << 1) */ @@ -2472,7 +2475,7 @@ struct ec_response_i2c_read { } __ec_align2; /* Write I2C bus */ -#define EC_CMD_I2C_WRITE 0x95 +#define EC_CMD_I2C_WRITE 0x0095 struct ec_params_i2c_write { uint16_t data; @@ -2488,7 +2491,7 @@ struct ec_params_i2c_write { /* Force charge state machine to stop charging the battery or force it to * discharge the battery. */ -#define EC_CMD_CHARGE_CONTROL 0x96 +#define EC_CMD_CHARGE_CONTROL 0x0096 #define EC_VER_CHARGE_CONTROL 1 enum ec_charge_control_mode { @@ -2504,7 +2507,7 @@ struct ec_params_charge_control { /*****************************************************************************/ /* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */ -#define EC_CMD_CONSOLE_SNAPSHOT 0x97 +#define EC_CMD_CONSOLE_SNAPSHOT 0x0097 /* * Read data from the saved snapshot. If the subcmd parameter is @@ -2518,7 +2521,7 @@ struct ec_params_charge_control { * Response is null-terminated string. Empty string, if there is no more * remaining output. */ -#define EC_CMD_CONSOLE_READ 0x98 +#define EC_CMD_CONSOLE_READ 0x0098 enum ec_console_read_subcmd { CONSOLE_READ_NEXT = 0, @@ -2538,8 +2541,7 @@ struct ec_params_console_read_v1 { * EC_RES_SUCCESS if the command was successful. * EC_RES_ERROR if the cut off command failed. */ - -#define EC_CMD_BATTERY_CUT_OFF 0x99 +#define EC_CMD_BATTERY_CUT_OFF 0x0099 #define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN (1 << 0) @@ -2553,7 +2555,7 @@ struct ec_params_battery_cutoff { /* * Switch USB mux or return to automatic switching. */ -#define EC_CMD_USB_MUX 0x9a +#define EC_CMD_USB_MUX 0x009A struct ec_params_usb_mux { uint8_t mux; @@ -2570,7 +2572,7 @@ enum ec_ldo_state { /* * Switch on/off a LDO. */ -#define EC_CMD_LDO_SET 0x9b +#define EC_CMD_LDO_SET 0x009B struct ec_params_ldo_set { uint8_t index; @@ -2580,7 +2582,7 @@ struct ec_params_ldo_set { /* * Get LDO state. */ -#define EC_CMD_LDO_GET 0x9c +#define EC_CMD_LDO_GET 0x009C struct ec_params_ldo_get { uint8_t index; @@ -2596,7 +2598,7 @@ struct ec_response_ldo_get { /* * Get power info. */ -#define EC_CMD_POWER_INFO 0x9d +#define EC_CMD_POWER_INFO 0x009D struct ec_response_power_info { uint32_t usb_dev_type; @@ -2609,7 +2611,7 @@ struct ec_response_power_info { /*****************************************************************************/ /* I2C passthru command */ -#define EC_CMD_I2C_PASSTHRU 0x9e +#define EC_CMD_I2C_PASSTHRU 0x009E /* Read data; if not present, message is a write */ #define EC_I2C_FLAG_READ (1 << 15) @@ -2644,7 +2646,7 @@ struct ec_response_i2c_passthru { /*****************************************************************************/ /* Power button hang detect */ -#define EC_CMD_HANG_DETECT 0x9f +#define EC_CMD_HANG_DETECT 0x009F /* Reasons to start hang detection timer */ /* Power button pressed */ @@ -2703,7 +2705,7 @@ struct ec_params_hang_detect { * This is the single catch-all host command to exchange data regarding the * charge state machine (v2 and up). */ -#define EC_CMD_CHARGE_STATE 0xa0 +#define EC_CMD_CHARGE_STATE 0x00A0 /* Subcommands for this host command */ enum charge_state_command { @@ -2774,7 +2776,7 @@ struct ec_response_charge_state { /* * Set maximum battery charging current. */ -#define EC_CMD_CHARGE_CURRENT_LIMIT 0xa1 +#define EC_CMD_CHARGE_CURRENT_LIMIT 0x00A1 struct ec_params_current_limit { uint32_t limit; /* in mA */ @@ -2794,7 +2796,7 @@ struct ec_params_external_power_limit_v1 { #define EC_POWER_LIMIT_NONE 0xffff /* Inform the EC when entering a sleep state */ -#define EC_CMD_HOST_SLEEP_EVENT 0xa9 +#define EC_CMD_HOST_SLEEP_EVENT 0x00A9 enum host_sleep_event { HOST_SLEEP_EVENT_S3_SUSPEND = 1, @@ -2868,14 +2870,14 @@ struct ec_response_host_sleep_event_v1 { /* Smart battery pass-through */ /* Get / Set 16-bit smart battery registers */ -#define EC_CMD_SB_READ_WORD 0xb0 -#define EC_CMD_SB_WRITE_WORD 0xb1 +#define EC_CMD_SB_READ_WORD 0x00B0 +#define EC_CMD_SB_WRITE_WORD 0x00B1 /* Get / Set string smart battery parameters * formatted as SMBUS "block". */ -#define EC_CMD_SB_READ_BLOCK 0xb2 -#define EC_CMD_SB_WRITE_BLOCK 0xb3 +#define EC_CMD_SB_READ_BLOCK 0x00B2 +#define EC_CMD_SB_WRITE_BLOCK 0x00B3 struct ec_params_sb_rd { uint8_t reg; @@ -2908,7 +2910,7 @@ struct ec_params_sb_wr_block { * requested value. */ -#define EC_CMD_BATTERY_VENDOR_PARAM 0xb4 +#define EC_CMD_BATTERY_VENDOR_PARAM 0x00B4 enum ec_battery_vendor_param_mode { BATTERY_VENDOR_PARAM_MODE_GET = 0, @@ -3024,7 +3026,7 @@ struct ec_response_codec_gain { * TODO(crosbug.com/p/23747): This is a confusing name, since it doesn't * necessarily reboot the EC. Rename to "image" or something similar? */ -#define EC_CMD_REBOOT_EC 0xd2 +#define EC_CMD_REBOOT_EC 0x00D2 /* Command */ enum ec_reboot_cmd { @@ -3052,7 +3054,7 @@ struct ec_params_reboot_ec { * Returns variable-length platform-dependent panic information. See panic.h * for details. */ -#define EC_CMD_GET_PANIC_INFO 0xd3 +#define EC_CMD_GET_PANIC_INFO 0x00D3 /*****************************************************************************/ /* @@ -3260,7 +3262,7 @@ enum mkbp_cec_event { * * Use EC_CMD_REBOOT_EC to reboot the EC more politely. */ -#define EC_CMD_REBOOT 0xd1 /* Think "die" */ +#define EC_CMD_REBOOT 0x00D1 /* Think "die" */ /* * Resend last response (not supported on LPC). @@ -3269,7 +3271,7 @@ enum mkbp_cec_event { * there was no previous command, or the previous command's response was too * big to save. */ -#define EC_CMD_RESEND_RESPONSE 0xdb +#define EC_CMD_RESEND_RESPONSE 0x00DB /* * This header byte on a command indicate version 0. Any header byte less @@ -3281,7 +3283,7 @@ enum mkbp_cec_event { * * The old EC interface must not use commands 0xdc or higher. */ -#define EC_CMD_VERSION0 0xdc +#define EC_CMD_VERSION0 0x00DC #endif /* !__ACPI__ */ @@ -3293,7 +3295,7 @@ enum mkbp_cec_event { */ /* EC to PD MCU exchange status command */ -#define EC_CMD_PD_EXCHANGE_STATUS 0x100 +#define EC_CMD_PD_EXCHANGE_STATUS 0x0100 /* Status of EC being sent to PD */ struct ec_params_pd_status { @@ -3307,7 +3309,7 @@ struct ec_response_pd_status { } __ec_align_size1; /* Set USB type-C port role and muxes */ -#define EC_CMD_USB_PD_CONTROL 0x101 +#define EC_CMD_USB_PD_CONTROL 0x0101 enum usb_pd_control_role { USB_PD_CTRL_ROLE_NO_CHANGE = 0, @@ -3360,7 +3362,7 @@ struct ec_response_usb_pd_control_v1 { char state[32]; } __ec_align1; -#define EC_CMD_USB_PD_PORTS 0x102 +#define EC_CMD_USB_PD_PORTS 0x0102 /* Maximum number of PD ports on a device, num_ports will be <= this */ #define EC_USB_PD_MAX_PORTS 8 @@ -3369,7 +3371,7 @@ struct ec_response_usb_pd_ports { uint8_t num_ports; } __ec_align1; -#define EC_CMD_USB_PD_POWER_INFO 0x103 +#define EC_CMD_USB_PD_POWER_INFO 0x0103 #define PD_POWER_CHARGING_PORT 0xff struct ec_params_usb_pd_power_info { @@ -3536,7 +3538,7 @@ struct mcdp_info { #define MCDP_FAMILY(family) ((family[0] << 8) | family[1]) /* Get info about USB-C SS muxes */ -#define EC_CMD_USB_PD_MUX_INFO 0x11a +#define EC_CMD_USB_PD_MUX_INFO 0x011A struct ec_params_usb_pd_mux_info { uint8_t port; /* USB-C port number */ @@ -3551,6 +3553,41 @@ struct ec_params_usb_pd_mux_info { struct ec_response_usb_pd_mux_info { uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */ } __ec_align1; +/*****************************************************************************/ +/* + * Reserve a range of host commands for board-specific, experimental, or + * special purpose features. These can be (re)used without updating this file. + * + * CAUTION: Don't go nuts with this. Shipping products should document ALL + * their EC commands for easier development, testing, debugging, and support. + * + * All commands MUST be #defined to be 4-digit UPPER CASE hex values + * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work. + * + * In your experimental code, you may want to do something like this: + * + * #define EC_CMD_MAGIC_FOO 0x0000 + * #define EC_CMD_MAGIC_BAR 0x0001 + * #define EC_CMD_MAGIC_HEY 0x0002 + * + * DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_FOO, magic_foo_handler, + * EC_VER_MASK(0); + * + * DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_BAR, magic_bar_handler, + * EC_VER_MASK(0); + * + * DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_HEY, magic_hey_handler, + * EC_VER_MASK(0); + */ +#define EC_CMD_BOARD_SPECIFIC_BASE 0x3E00 +#define EC_CMD_BOARD_SPECIFIC_LAST 0x3FFF + +/* + * Given the private host command offset, calculate the true private host + * command value. + */ +#define EC_PRIVATE_HOST_COMMAND_VALUE(command) \ + (EC_CMD_BOARD_SPECIFIC_BASE + (command)) /*****************************************************************************/ /* -- cgit v1.2.3 From 9e81656063776b38f8fa8ffd7a5a2d4d42bdd2b4 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:37 -0700 Subject: mfd: cros_ec: use BIT macro Replace (1 << ...) with BIT(). Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 8ad77d8a9141..e97e9e976bd0 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -28,7 +28,7 @@ #define EC_PROTO_VERSION 0x00000002 /* Command version mask */ -#define EC_VER_MASK(version) (1UL << (version)) +#define EC_VER_MASK(version) BIT(version) /* I/O addresses for ACPI commands */ #define EC_LPC_ADDR_ACPI_DATA 0x62 @@ -57,13 +57,13 @@ #define EC_HOST_CMD_REGION_SIZE 0x80 /* EC command register bit functions */ -#define EC_LPC_CMDR_DATA (1 << 0) /* Data ready for host to read */ -#define EC_LPC_CMDR_PENDING (1 << 1) /* Write pending to EC */ -#define EC_LPC_CMDR_BUSY (1 << 2) /* EC is busy processing a command */ -#define EC_LPC_CMDR_CMD (1 << 3) /* Last host write was a command */ -#define EC_LPC_CMDR_ACPI_BRST (1 << 4) /* Burst mode (not used) */ -#define EC_LPC_CMDR_SCI (1 << 5) /* SCI event is pending */ -#define EC_LPC_CMDR_SMI (1 << 6) /* SMI event is pending */ +#define EC_LPC_CMDR_DATA BIT(0) /* Data ready for host to read */ +#define EC_LPC_CMDR_PENDING BIT(1) /* Write pending to EC */ +#define EC_LPC_CMDR_BUSY BIT(2) /* EC is busy processing a command */ +#define EC_LPC_CMDR_CMD BIT(3) /* Last host write was a command */ +#define EC_LPC_CMDR_ACPI_BRST BIT(4) /* Burst mode (not used) */ +#define EC_LPC_CMDR_SCI BIT(5) /* SCI event is pending */ +#define EC_LPC_CMDR_SMI BIT(6) /* SMI event is pending */ #define EC_LPC_ADDR_MEMMAP 0x900 #define EC_MEMMAP_SIZE 255 /* ACPI IO buffer max is 255 bytes */ @@ -110,8 +110,8 @@ /* Define the format of the accelerometer mapped memory status byte. */ #define EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK 0x0f -#define EC_MEMMAP_ACC_STATUS_BUSY_BIT (1 << 4) -#define EC_MEMMAP_ACC_STATUS_PRESENCE_BIT (1 << 7) +#define EC_MEMMAP_ACC_STATUS_BUSY_BIT BIT(4) +#define EC_MEMMAP_ACC_STATUS_PRESENCE_BIT BIT(7) /* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */ #define EC_TEMP_SENSOR_ENTRIES 16 @@ -336,7 +336,7 @@ enum host_event_code { EC_HOST_EVENT_INVALID = 32 }; /* Host event mask */ -#define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1)) +#define EC_HOST_EVENT_MASK(event_code) BIT_ULL((event_code) - 1) /** * struct ec_lpc_host_args - Arguments at EC_LPC_ADDR_HOST_ARGS @@ -734,7 +734,7 @@ struct ec_response_get_cmd_versions { /* Avoid using ec_status which is for return values */ enum ec_comms_status { - EC_COMMS_STATUS_PROCESSING = 1 << 0, /* Processing cmd */ + EC_COMMS_STATUS_PROCESSING = BIT(0), /* Processing cmd */ }; /** @@ -766,7 +766,7 @@ struct ec_response_test_protocol { /* Flags for ec_response_get_protocol_info.flags */ /* EC_RES_IN_PROGRESS may be returned if a command is slow */ -#define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED (1 << 0) +#define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED BIT(0) /** * struct ec_response_get_protocol_info - Response to the get protocol info. @@ -925,8 +925,8 @@ enum ec_feature_code { EC_FEATURE_ISH = 40, }; -#define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32)) -#define EC_FEATURE_MASK_1(event_code) (1UL << (event_code - 32)) +#define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) +#define EC_FEATURE_MASK_1(event_code) BIT(event_code - 32) struct ec_response_get_features { uint32_t flags[2]; @@ -961,7 +961,7 @@ struct ec_response_flash_info { * Flags for version 1+ flash info command * EC flash erases bits to 0 instead of 1. */ -#define EC_FLASH_INFO_ERASE_TO_0 (1 << 0) +#define EC_FLASH_INFO_ERASE_TO_0 BIT(0) /** * struct ec_response_flash_info_1 - Response to the flash info v1 command. @@ -1061,26 +1061,26 @@ struct ec_params_flash_erase { /* Flags for flash protection */ /* RO flash code protected when the EC boots */ -#define EC_FLASH_PROTECT_RO_AT_BOOT (1 << 0) +#define EC_FLASH_PROTECT_RO_AT_BOOT BIT(0) /* * RO flash code protected now. If this bit is set, at-boot status cannot * be changed. */ -#define EC_FLASH_PROTECT_RO_NOW (1 << 1) +#define EC_FLASH_PROTECT_RO_NOW BIT(1) /* Entire flash code protected now, until reboot. */ -#define EC_FLASH_PROTECT_ALL_NOW (1 << 2) +#define EC_FLASH_PROTECT_ALL_NOW BIT(2) /* Flash write protect GPIO is asserted now */ -#define EC_FLASH_PROTECT_GPIO_ASSERTED (1 << 3) +#define EC_FLASH_PROTECT_GPIO_ASSERTED BIT(3) /* Error - at least one bank of flash is stuck locked, and cannot be unlocked */ -#define EC_FLASH_PROTECT_ERROR_STUCK (1 << 4) +#define EC_FLASH_PROTECT_ERROR_STUCK BIT(4) /* * Error - flash protection is in inconsistent state. At least one bank of * flash which should be protected is not protected. Usually fixed by * re-requesting the desired flags, or by a hard reset if that fails. */ -#define EC_FLASH_PROTECT_ERROR_INCONSISTENT (1 << 5) +#define EC_FLASH_PROTECT_ERROR_INCONSISTENT BIT(5) /* Entire flash code protected when the EC boots */ -#define EC_FLASH_PROTECT_ALL_AT_BOOT (1 << 6) +#define EC_FLASH_PROTECT_ALL_AT_BOOT BIT(6) /** * struct ec_params_flash_protect - Parameters for the flash protect command. @@ -1451,8 +1451,8 @@ enum ec_led_id { }; /* LED control flags */ -#define EC_LED_FLAGS_QUERY (1 << 0) /* Query LED capability only */ -#define EC_LED_FLAGS_AUTO (1 << 1) /* Switch LED back to automatic control */ +#define EC_LED_FLAGS_QUERY BIT(0) /* Query LED capability only */ +#define EC_LED_FLAGS_AUTO BIT(1) /* Switch LED back to automatic control */ enum ec_led_colors { EC_LED_COLOR_RED = 0, @@ -2116,13 +2116,13 @@ enum mkbp_config_flags { }; enum mkbp_config_valid { - EC_MKBP_VALID_SCAN_PERIOD = 1 << 0, - EC_MKBP_VALID_POLL_TIMEOUT = 1 << 1, - EC_MKBP_VALID_MIN_POST_SCAN_DELAY = 1 << 3, - EC_MKBP_VALID_OUTPUT_SETTLE = 1 << 4, - EC_MKBP_VALID_DEBOUNCE_DOWN = 1 << 5, - EC_MKBP_VALID_DEBOUNCE_UP = 1 << 6, - EC_MKBP_VALID_FIFO_MAX_DEPTH = 1 << 7, + EC_MKBP_VALID_SCAN_PERIOD = BIT(0), + EC_MKBP_VALID_POLL_TIMEOUT = BIT(1), + EC_MKBP_VALID_MIN_POST_SCAN_DELAY = BIT(3), + EC_MKBP_VALID_OUTPUT_SETTLE = BIT(4), + EC_MKBP_VALID_DEBOUNCE_DOWN = BIT(5), + EC_MKBP_VALID_DEBOUNCE_UP = BIT(6), + EC_MKBP_VALID_FIFO_MAX_DEPTH = BIT(7), }; /* @@ -2176,7 +2176,7 @@ enum ec_collect_flags { * Indicates this scan was processed by the EC. Due to timing, some * scans may be skipped. */ - EC_KEYSCAN_SEQ_FLAG_DONE = 1 << 0, + EC_KEYSCAN_SEQ_FLAG_DONE = BIT(0), }; struct ec_collect_item { @@ -2543,7 +2543,7 @@ struct ec_params_console_read_v1 { */ #define EC_CMD_BATTERY_CUT_OFF 0x0099 -#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN (1 << 0) +#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN BIT(0) struct ec_params_battery_cutoff { uint8_t flags; @@ -2614,13 +2614,13 @@ struct ec_response_power_info { #define EC_CMD_I2C_PASSTHRU 0x009E /* Read data; if not present, message is a write */ -#define EC_I2C_FLAG_READ (1 << 15) +#define EC_I2C_FLAG_READ BIT(15) /* Mask for address */ #define EC_I2C_ADDR_MASK 0x3ff -#define EC_I2C_STATUS_NAK (1 << 0) /* Transfer was not acknowledged */ -#define EC_I2C_STATUS_TIMEOUT (1 << 1) /* Timeout during transfer */ +#define EC_I2C_STATUS_NAK BIT(0) /* Transfer was not acknowledged */ +#define EC_I2C_STATUS_TIMEOUT BIT(1) /* Timeout during transfer */ /* Any error */ #define EC_I2C_STATUS_ERROR (EC_I2C_STATUS_NAK | EC_I2C_STATUS_TIMEOUT) @@ -2650,27 +2650,27 @@ struct ec_response_i2c_passthru { /* Reasons to start hang detection timer */ /* Power button pressed */ -#define EC_HANG_START_ON_POWER_PRESS (1 << 0) +#define EC_HANG_START_ON_POWER_PRESS BIT(0) /* Lid closed */ -#define EC_HANG_START_ON_LID_CLOSE (1 << 1) +#define EC_HANG_START_ON_LID_CLOSE BIT(1) /* Lid opened */ -#define EC_HANG_START_ON_LID_OPEN (1 << 2) +#define EC_HANG_START_ON_LID_OPEN BIT(2) /* Start of AP S3->S0 transition (booting or resuming from suspend) */ -#define EC_HANG_START_ON_RESUME (1 << 3) +#define EC_HANG_START_ON_RESUME BIT(3) /* Reasons to cancel hang detection */ /* Power button released */ -#define EC_HANG_STOP_ON_POWER_RELEASE (1 << 8) +#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8) /* Any host command from AP received */ -#define EC_HANG_STOP_ON_HOST_COMMAND (1 << 9) +#define EC_HANG_STOP_ON_HOST_COMMAND BIT(9) /* Stop on end of AP S0->S3 transition (suspending or shutting down) */ -#define EC_HANG_STOP_ON_SUSPEND (1 << 10) +#define EC_HANG_STOP_ON_SUSPEND BIT(10) /* * If this flag is set, all the other fields are ignored, and the hang detect @@ -2678,14 +2678,14 @@ struct ec_response_i2c_passthru { * without reconfiguring any of the other hang detect settings. Note that * you must previously have configured the timeouts. */ -#define EC_HANG_START_NOW (1 << 30) +#define EC_HANG_START_NOW BIT(30) /* * If this flag is set, all the other fields are ignored (including * EC_HANG_START_NOW). This provides the AP a way to stop the hang timer * without reconfiguring any of the other hang detect settings. */ -#define EC_HANG_STOP_NOW (1 << 31) +#define EC_HANG_STOP_NOW BIT(31) struct ec_params_hang_detect { /* Flags; see EC_HANG_* */ @@ -3040,8 +3040,8 @@ enum ec_reboot_cmd { }; /* Flags for ec_params_reboot_ec.reboot_flags */ -#define EC_REBOOT_FLAG_RESERVED0 (1 << 0) /* Was recovery request */ -#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN (1 << 1) /* Reboot after AP shutdown */ +#define EC_REBOOT_FLAG_RESERVED0 BIT(0) /* Was recovery request */ +#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN BIT(1) /* Reboot after AP shutdown */ struct ec_params_reboot_ec { uint8_t cmd; /* enum ec_reboot_cmd */ @@ -3343,9 +3343,9 @@ struct ec_params_usb_pd_control { uint8_t swap; } __ec_align1; -#define PD_CTRL_RESP_ENABLED_COMMS (1 << 0) /* Communication enabled */ -#define PD_CTRL_RESP_ENABLED_CONNECTED (1 << 1) /* Device connected */ -#define PD_CTRL_RESP_ENABLED_PD_CAPABLE (1 << 2) /* Partner is PD capable */ +#define PD_CTRL_RESP_ENABLED_COMMS BIT(0) /* Communication enabled */ +#define PD_CTRL_RESP_ENABLED_CONNECTED BIT(1) /* Device connected */ +#define PD_CTRL_RESP_ENABLED_PD_CAPABLE BIT(2) /* Partner is PD capable */ #define PD_CTRL_RESP_ROLE_POWER BIT(0) /* 0=SNK/1=SRC */ #define PD_CTRL_RESP_ROLE_DATA BIT(1) /* 0=UFP/1=DFP */ @@ -3545,10 +3545,10 @@ struct ec_params_usb_pd_mux_info { } __ec_align1; /* Flags representing mux state */ -#define USB_PD_MUX_USB_ENABLED (1 << 0) -#define USB_PD_MUX_DP_ENABLED (1 << 1) -#define USB_PD_MUX_POLARITY_INVERTED (1 << 2) -#define USB_PD_MUX_HPD_IRQ (1 << 3) +#define USB_PD_MUX_USB_ENABLED BIT(0) /* USB connected */ +#define USB_PD_MUX_DP_ENABLED BIT(1) /* DP connected */ +#define USB_PD_MUX_POLARITY_INVERTED BIT(2) /* CC line Polarity inverted */ +#define USB_PD_MUX_HPD_IRQ BIT(3) /* HPD IRQ is asserted */ struct ec_response_usb_pd_mux_info { uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */ -- cgit v1.2.3 From ce86c87d73512c8ac71b9b5335c8b830c7ac9491 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:38 -0700 Subject: mfd: cros_ec: Update ACPI interface definition Add more fields and improve API when EC presents data through ACPI memory space. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 419 ++++++++++++++++++++++++----------- 1 file changed, 293 insertions(+), 126 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index e97e9e976bd0..575066b90bab 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -83,13 +83,15 @@ /* Unused 0x28 - 0x2f */ #define EC_MEMMAP_SWITCHES 0x30 /* 8 bits */ /* Unused 0x31 - 0x33 */ -#define EC_MEMMAP_HOST_EVENTS 0x34 /* 32 bits */ -/* Reserve 0x38 - 0x3f for additional host event-related stuff */ -/* Battery values are all 32 bits */ +#define EC_MEMMAP_HOST_EVENTS 0x34 /* 64 bits */ +/* Battery values are all 32 bits, unless otherwise noted. */ #define EC_MEMMAP_BATT_VOLT 0x40 /* Battery Present Voltage */ #define EC_MEMMAP_BATT_RATE 0x44 /* Battery Present Rate */ #define EC_MEMMAP_BATT_CAP 0x48 /* Battery Remaining Capacity */ -#define EC_MEMMAP_BATT_FLAG 0x4c /* Battery State, defined below */ +#define EC_MEMMAP_BATT_FLAG 0x4c /* Battery State, see below (8-bit) */ +#define EC_MEMMAP_BATT_COUNT 0x4d /* Battery Count (8-bit) */ +#define EC_MEMMAP_BATT_INDEX 0x4e /* Current Battery Data Index (8-bit) */ +/* Unused 0x4f */ #define EC_MEMMAP_BATT_DCAP 0x50 /* Battery Design Capacity */ #define EC_MEMMAP_BATT_DVLT 0x54 /* Battery Design Voltage */ #define EC_MEMMAP_BATT_LFCC 0x58 /* Battery Last Full Charge Capacity */ @@ -103,10 +105,19 @@ /* Unused 0x84 - 0x8f */ #define EC_MEMMAP_ACC_STATUS 0x90 /* Accelerometer status (8 bits )*/ /* Unused 0x91 */ -#define EC_MEMMAP_ACC_DATA 0x92 /* Accelerometer data 0x92 - 0x9f */ +#define EC_MEMMAP_ACC_DATA 0x92 /* Accelerometers data 0x92 - 0x9f */ +/* 0x92: Lid Angle if available, LID_ANGLE_UNRELIABLE otherwise */ +/* 0x94 - 0x99: 1st Accelerometer */ +/* 0x9a - 0x9f: 2nd Accelerometer */ #define EC_MEMMAP_GYRO_DATA 0xa0 /* Gyroscope data 0xa0 - 0xa5 */ -/* Unused 0xa6 - 0xfe (remember, 0xff is NOT part of the memmap region) */ +/* Unused 0xa6 - 0xdf */ +/* + * ACPI is unable to access memory mapped data at or above this offset due to + * limitations of the ACPI protocol. Do not place data in the range 0xe0 - 0xfe + * which might be needed by ACPI. + */ +#define EC_MEMMAP_NO_ACPI 0xe0 /* Define the format of the accelerometer mapped memory status byte. */ #define EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK 0x0f @@ -155,6 +166,8 @@ #define EC_BATT_FLAG_DISCHARGING 0x04 #define EC_BATT_FLAG_CHARGING 0x08 #define EC_BATT_FLAG_LEVEL_CRITICAL 0x10 +/* Set if some of the static/dynamic data is invalid (or outdated). */ +#define EC_BATT_FLAG_INVALID_DATA 0x20 /* Switch flags at EC_MEMMAP_SWITCHES */ #define EC_SWITCH_LID_OPEN 0x01 @@ -180,12 +193,200 @@ #define EC_WIRELESS_SWITCH_WWAN 0x04 /* WWAN power */ #define EC_WIRELESS_SWITCH_WLAN_POWER 0x08 /* WLAN power */ +/*****************************************************************************/ +/* + * ACPI commands + * + * These are valid ONLY on the ACPI command/data port. + */ + +/* + * ACPI Read Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_DATA bit to set + * - Read value from EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_READ 0x0080 + +/* + * ACPI Write Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write value to EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_WRITE 0x0081 + +/* + * ACPI Burst Enable Embedded Controller + * + * This enables burst mode on the EC to allow the host to issue several + * commands back-to-back. While in this mode, writes to mapped multi-byte + * data are locked out to ensure data consistency. + */ +#define EC_CMD_ACPI_BURST_ENABLE 0x0082 + +/* + * ACPI Burst Disable Embedded Controller + * + * This disables burst mode on the EC and stops preventing EC writes to mapped + * multi-byte data. + */ +#define EC_CMD_ACPI_BURST_DISABLE 0x0083 + +/* + * ACPI Query Embedded Controller + * + * This clears the lowest-order bit in the currently pending host events, and + * sets the result code to the 1-based index of the bit (event 0x00000001 = 1, + * event 0x80000000 = 32), or 0 if no event was pending. + */ +#define EC_CMD_ACPI_QUERY_EVENT 0x0084 + +/* Valid addresses in ACPI memory space, for read/write commands */ + +/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */ +#define EC_ACPI_MEM_VERSION 0x00 +/* + * Test location; writing value here updates test compliment byte to (0xff - + * value). + */ +#define EC_ACPI_MEM_TEST 0x01 +/* Test compliment; writes here are ignored. */ +#define EC_ACPI_MEM_TEST_COMPLIMENT 0x02 + +/* Keyboard backlight brightness percent (0 - 100) */ +#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03 +/* DPTF Target Fan Duty (0-100, 0xff for auto/none) */ +#define EC_ACPI_MEM_FAN_DUTY 0x04 + +/* + * DPTF temp thresholds. Any of the EC's temp sensors can have up to two + * independent thresholds attached to them. The current value of the ID + * register determines which sensor is affected by the THRESHOLD and COMMIT + * registers. The THRESHOLD register uses the same EC_TEMP_SENSOR_OFFSET scheme + * as the memory-mapped sensors. The COMMIT register applies those settings. + * + * The spec does not mandate any way to read back the threshold settings + * themselves, but when a threshold is crossed the AP needs a way to determine + * which sensor(s) are responsible. Each reading of the ID register clears and + * returns one sensor ID that has crossed one of its threshold (in either + * direction) since the last read. A value of 0xFF means "no new thresholds + * have tripped". Setting or enabling the thresholds for a sensor will clear + * the unread event count for that sensor. + */ +#define EC_ACPI_MEM_TEMP_ID 0x05 +#define EC_ACPI_MEM_TEMP_THRESHOLD 0x06 +#define EC_ACPI_MEM_TEMP_COMMIT 0x07 +/* + * Here are the bits for the COMMIT register: + * bit 0 selects the threshold index for the chosen sensor (0/1) + * bit 1 enables/disables the selected threshold (0 = off, 1 = on) + * Each write to the commit register affects one threshold. + */ +#define EC_ACPI_MEM_TEMP_COMMIT_SELECT_MASK BIT(0) +#define EC_ACPI_MEM_TEMP_COMMIT_ENABLE_MASK BIT(1) +/* + * Example: + * + * Set the thresholds for sensor 2 to 50 C and 60 C: + * write 2 to [0x05] -- select temp sensor 2 + * write 0x7b to [0x06] -- C_TO_K(50) - EC_TEMP_SENSOR_OFFSET + * write 0x2 to [0x07] -- enable threshold 0 with this value + * write 0x85 to [0x06] -- C_TO_K(60) - EC_TEMP_SENSOR_OFFSET + * write 0x3 to [0x07] -- enable threshold 1 with this value + * + * Disable the 60 C threshold, leaving the 50 C threshold unchanged: + * write 2 to [0x05] -- select temp sensor 2 + * write 0x1 to [0x07] -- disable threshold 1 + */ + +/* DPTF battery charging current limit */ +#define EC_ACPI_MEM_CHARGING_LIMIT 0x08 + +/* Charging limit is specified in 64 mA steps */ +#define EC_ACPI_MEM_CHARGING_LIMIT_STEP_MA 64 +/* Value to disable DPTF battery charging limit */ +#define EC_ACPI_MEM_CHARGING_LIMIT_DISABLED 0xff + +/* + * Report device orientation + * Bits Definition + * 3:1 Device DPTF Profile Number (DDPN) + * 0 = Reserved for backward compatibility (indicates no valid + * profile number. Host should fall back to using TBMD). + * 1..7 = DPTF Profile number to indicate to host which table needs + * to be loaded. + * 0 Tablet Mode Device Indicator (TBMD) + */ +#define EC_ACPI_MEM_DEVICE_ORIENTATION 0x09 +#define EC_ACPI_MEM_TBMD_SHIFT 0 +#define EC_ACPI_MEM_TBMD_MASK 0x1 +#define EC_ACPI_MEM_DDPN_SHIFT 1 +#define EC_ACPI_MEM_DDPN_MASK 0x7 + +/* + * Report device features. Uses the same format as the host command, except: + * + * bit 0 (EC_FEATURE_LIMITED) changes meaning from "EC code has a limited set + * of features", which is of limited interest when the system is already + * interpreting ACPI bytecode, to "EC_FEATURES[0-7] is not supported". Since + * these are supported, it defaults to 0. + * This allows detecting the presence of this field since older versions of + * the EC codebase would simply return 0xff to that unknown address. Check + * FEATURES0 != 0xff (or FEATURES0[0] == 0) to make sure that the other bits + * are valid. + */ +#define EC_ACPI_MEM_DEVICE_FEATURES0 0x0a +#define EC_ACPI_MEM_DEVICE_FEATURES1 0x0b +#define EC_ACPI_MEM_DEVICE_FEATURES2 0x0c +#define EC_ACPI_MEM_DEVICE_FEATURES3 0x0d +#define EC_ACPI_MEM_DEVICE_FEATURES4 0x0e +#define EC_ACPI_MEM_DEVICE_FEATURES5 0x0f +#define EC_ACPI_MEM_DEVICE_FEATURES6 0x10 +#define EC_ACPI_MEM_DEVICE_FEATURES7 0x11 + +#define EC_ACPI_MEM_BATTERY_INDEX 0x12 + +/* + * USB Port Power. Each bit indicates whether the corresponding USB ports' power + * is enabled (1) or disabled (0). + * bit 0 USB port ID 0 + * ... + * bit 7 USB port ID 7 + */ +#define EC_ACPI_MEM_USB_PORT_POWER 0x13 + +/* + * ACPI addresses 0x20 - 0xff map to EC_MEMMAP offset 0x00 - 0xdf. This data + * is read-only from the AP. Added in EC_ACPI_MEM_VERSION 2. + */ +#define EC_ACPI_MEM_MAPPED_BEGIN 0x20 +#define EC_ACPI_MEM_MAPPED_SIZE 0xe0 + +/* Current version of ACPI memory address space */ +#define EC_ACPI_MEM_VERSION_CURRENT 2 + + /* * This header file is used in coreboot both in C and ACPI code. The ACPI code * is pre-processed to handle constants but the ASL compiler is unable to * handle actual C code so keep it separate. */ -#ifndef __ACPI__ + /* * Attributes for EC request and response packets. Just defining __packed @@ -238,7 +439,7 @@ #define EC_LPC_STATUS_PROCESSING 0x04 /* Last write to EC was a command, not data */ #define EC_LPC_STATUS_LAST_CMD 0x08 -/* EC is in burst mode. Unsupported by Chrome EC, so this bit is never set */ +/* EC is in burst mode */ #define EC_LPC_STATUS_BURST_MODE 0x10 /* SCI event is pending (requesting SCI query) */ #define EC_LPC_STATUS_SCI_PENDING 0x20 @@ -2323,6 +2524,8 @@ struct ec_response_temp_sensor_get_info { /*****************************************************************************/ /* Host event commands */ + +/* Obsolete. New implementation should use EC_CMD_HOST_EVENT instead */ /* * Host event mask params and response structures, shared by all of the host * event commands below. @@ -2348,6 +2551,86 @@ struct ec_response_host_event_mask { #define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x008E #define EC_CMD_HOST_EVENT_CLEAR_B 0x008F +/* + * Unified host event programming interface - Should be used by newer versions + * of BIOS/OS to program host events and masks + */ + +struct ec_params_host_event { + + /* Action requested by host - one of enum ec_host_event_action. */ + uint8_t action; + + /* + * Mask type that the host requested the action on - one of + * enum ec_host_event_mask_type. + */ + uint8_t mask_type; + + /* Set to 0, ignore on read */ + uint16_t reserved; + + /* Value to be used in case of set operations. */ + uint64_t value; +} __ec_align4; + +/* + * Response structure returned by EC_CMD_HOST_EVENT. + * Update the value on a GET request. Set to 0 on GET/CLEAR + */ + +struct ec_response_host_event { + + /* Mask value in case of get operation */ + uint64_t value; +} __ec_align4; + +enum ec_host_event_action { + /* + * params.value is ignored. Value of mask_type populated + * in response.value + */ + EC_HOST_EVENT_GET, + + /* Bits in params.value are set */ + EC_HOST_EVENT_SET, + + /* Bits in params.value are cleared */ + EC_HOST_EVENT_CLEAR, +}; + +enum ec_host_event_mask_type { + + /* Main host event copy */ + EC_HOST_EVENT_MAIN, + + /* Copy B of host events */ + EC_HOST_EVENT_B, + + /* SCI Mask */ + EC_HOST_EVENT_SCI_MASK, + + /* SMI Mask */ + EC_HOST_EVENT_SMI_MASK, + + /* Mask of events that should be always reported in hostevents */ + EC_HOST_EVENT_ALWAYS_REPORT_MASK, + + /* Active wake mask */ + EC_HOST_EVENT_ACTIVE_WAKE_MASK, + + /* Lazy wake mask for S0ix */ + EC_HOST_EVENT_LAZY_WAKE_MASK_S0IX, + + /* Lazy wake mask for S3 */ + EC_HOST_EVENT_LAZY_WAKE_MASK_S3, + + /* Lazy wake mask for S5 */ + EC_HOST_EVENT_LAZY_WAKE_MASK_S5, +}; + +#define EC_CMD_HOST_EVENT 0x00A4 + /*****************************************************************************/ /* Switch commands */ @@ -3056,122 +3339,6 @@ struct ec_params_reboot_ec { */ #define EC_CMD_GET_PANIC_INFO 0x00D3 -/*****************************************************************************/ -/* - * ACPI commands - * - * These are valid ONLY on the ACPI command/data port. - */ - -/* - * ACPI Read Embedded Controller - * - * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). - * - * Use the following sequence: - * - * - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD - * - Wait for EC_LPC_CMDR_PENDING bit to clear - * - Write address to EC_LPC_ADDR_ACPI_DATA - * - Wait for EC_LPC_CMDR_DATA bit to set - * - Read value from EC_LPC_ADDR_ACPI_DATA - */ -#define EC_CMD_ACPI_READ 0x80 - -/* - * ACPI Write Embedded Controller - * - * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). - * - * Use the following sequence: - * - * - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD - * - Wait for EC_LPC_CMDR_PENDING bit to clear - * - Write address to EC_LPC_ADDR_ACPI_DATA - * - Wait for EC_LPC_CMDR_PENDING bit to clear - * - Write value to EC_LPC_ADDR_ACPI_DATA - */ -#define EC_CMD_ACPI_WRITE 0x81 - -/* - * ACPI Query Embedded Controller - * - * This clears the lowest-order bit in the currently pending host events, and - * sets the result code to the 1-based index of the bit (event 0x00000001 = 1, - * event 0x80000000 = 32), or 0 if no event was pending. - */ -#define EC_CMD_ACPI_QUERY_EVENT 0x84 - -/* Valid addresses in ACPI memory space, for read/write commands */ - -/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */ -#define EC_ACPI_MEM_VERSION 0x00 -/* - * Test location; writing value here updates test compliment byte to (0xff - - * value). - */ -#define EC_ACPI_MEM_TEST 0x01 -/* Test compliment; writes here are ignored. */ -#define EC_ACPI_MEM_TEST_COMPLIMENT 0x02 - -/* Keyboard backlight brightness percent (0 - 100) */ -#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03 -/* DPTF Target Fan Duty (0-100, 0xff for auto/none) */ -#define EC_ACPI_MEM_FAN_DUTY 0x04 - -/* - * DPTF temp thresholds. Any of the EC's temp sensors can have up to two - * independent thresholds attached to them. The current value of the ID - * register determines which sensor is affected by the THRESHOLD and COMMIT - * registers. The THRESHOLD register uses the same EC_TEMP_SENSOR_OFFSET scheme - * as the memory-mapped sensors. The COMMIT register applies those settings. - * - * The spec does not mandate any way to read back the threshold settings - * themselves, but when a threshold is crossed the AP needs a way to determine - * which sensor(s) are responsible. Each reading of the ID register clears and - * returns one sensor ID that has crossed one of its threshold (in either - * direction) since the last read. A value of 0xFF means "no new thresholds - * have tripped". Setting or enabling the thresholds for a sensor will clear - * the unread event count for that sensor. - */ -#define EC_ACPI_MEM_TEMP_ID 0x05 -#define EC_ACPI_MEM_TEMP_THRESHOLD 0x06 -#define EC_ACPI_MEM_TEMP_COMMIT 0x07 -/* - * Here are the bits for the COMMIT register: - * bit 0 selects the threshold index for the chosen sensor (0/1) - * bit 1 enables/disables the selected threshold (0 = off, 1 = on) - * Each write to the commit register affects one threshold. - */ -#define EC_ACPI_MEM_TEMP_COMMIT_SELECT_MASK (1 << 0) -#define EC_ACPI_MEM_TEMP_COMMIT_ENABLE_MASK (1 << 1) -/* - * Example: - * - * Set the thresholds for sensor 2 to 50 C and 60 C: - * write 2 to [0x05] -- select temp sensor 2 - * write 0x7b to [0x06] -- C_TO_K(50) - EC_TEMP_SENSOR_OFFSET - * write 0x2 to [0x07] -- enable threshold 0 with this value - * write 0x85 to [0x06] -- C_TO_K(60) - EC_TEMP_SENSOR_OFFSET - * write 0x3 to [0x07] -- enable threshold 1 with this value - * - * Disable the 60 C threshold, leaving the 50 C threshold unchanged: - * write 2 to [0x05] -- select temp sensor 2 - * write 0x1 to [0x07] -- disable threshold 1 - */ - -/* DPTF battery charging current limit */ -#define EC_ACPI_MEM_CHARGING_LIMIT 0x08 - -/* Charging limit is specified in 64 mA steps */ -#define EC_ACPI_MEM_CHARGING_LIMIT_STEP_MA 64 -/* Value to disable DPTF battery charging limit */ -#define EC_ACPI_MEM_CHARGING_LIMIT_DISABLED 0xff - -/* Current version of ACPI memory address space */ -#define EC_ACPI_MEM_VERSION_CURRENT 1 - - /*****************************************************************************/ /* * HDMI CEC commands @@ -3285,8 +3452,6 @@ enum mkbp_cec_event { */ #define EC_CMD_VERSION0 0x00DC -#endif /* !__ACPI__ */ - /*****************************************************************************/ /* * PD commands @@ -3627,4 +3792,6 @@ struct ec_response_usb_pd_mux_info { #define EC_LPC_ADDR_OLD_PARAM EC_HOST_CMD_REGION1 #define EC_OLD_PARAM_SIZE EC_HOST_CMD_REGION_SIZE + + #endif /* __CROS_EC_COMMANDS_H */ -- cgit v1.2.3 From e849b87487fb2ee448318d54381608f99ce2d4e0 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:39 -0700 Subject: mfd: cros_ec: move HDMI CEC API definition Move near the end of file. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 148 ++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 575066b90bab..d8bde2b5e9ce 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -3211,6 +3211,81 @@ struct ec_response_battery_vendor_param { } __ec_align4; /*****************************************************************************/ +/* + * HDMI CEC commands + * + * These commands are for sending and receiving message via HDMI CEC + */ + +#define MAX_CEC_MSG_LEN 16 + +/* CEC message from the AP to be written on the CEC bus */ +#define EC_CMD_CEC_WRITE_MSG 0x00B8 + +/** + * struct ec_params_cec_write - Message to write to the CEC bus + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write { + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + +/* Set various CEC parameters */ +#define EC_CMD_CEC_SET 0x00BA + +/** + * struct ec_params_cec_set - CEC parameters set + * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC + * or 1 to enable CEC functionality, in case cmd is + * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical + * address between 0 and 15 or 0xff to unregister + */ +struct ec_params_cec_set { + uint8_t cmd; /* enum cec_command */ + uint8_t val; +} __ec_align1; + +/* Read various CEC parameters */ +#define EC_CMD_CEC_GET 0x00BB + +/** + * struct ec_params_cec_get - CEC parameters get + * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + */ +struct ec_params_cec_get { + uint8_t cmd; /* enum cec_command */ +} __ec_align1; + +/** + * struct ec_response_cec_get - CEC parameters get response + * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is + * disabled or 1 if CEC functionality is enabled, + * in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the + * configured logical address between 0 and 15 or 0xff if unregistered + */ +struct ec_response_cec_get { + uint8_t val; +} __ec_align1; + +/* CEC parameters command */ +enum cec_command { + /* CEC reading, writing and events enable */ + CEC_CMD_ENABLE, + /* CEC logical address */ + CEC_CMD_LOGICAL_ADDRESS, +}; + +/* Events from CEC to AP */ +enum mkbp_cec_event { + /* Outgoing message was acknowledged by a follower */ + EC_MKBP_CEC_SEND_OK = BIT(0), + /* Outgoing message was not acknowledged */ + EC_MKBP_CEC_SEND_FAILED = BIT(1), +}; + +/*****************************************************************************/ + /* Commands for I2S recording on audio codec. */ #define EC_CMD_CODEC_I2S 0x00BC @@ -3339,79 +3414,6 @@ struct ec_params_reboot_ec { */ #define EC_CMD_GET_PANIC_INFO 0x00D3 -/*****************************************************************************/ -/* - * HDMI CEC commands - * - * These commands are for sending and receiving message via HDMI CEC - */ -#define EC_MAX_CEC_MSG_LEN 16 - -/* CEC message from the AP to be written on the CEC bus */ -#define EC_CMD_CEC_WRITE_MSG 0x00B8 - -/** - * struct ec_params_cec_write - Message to write to the CEC bus - * @msg: message content to write to the CEC bus - */ -struct ec_params_cec_write { - uint8_t msg[EC_MAX_CEC_MSG_LEN]; -} __ec_align1; - -/* Set various CEC parameters */ -#define EC_CMD_CEC_SET 0x00BA - -/** - * struct ec_params_cec_set - CEC parameters set - * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS - * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC - * or 1 to enable CEC functionality, in case cmd is CEC_CMD_LOGICAL_ADDRESS, - * this field encodes the requested logical address between 0 and 15 - * or 0xff to unregister - */ -struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ - uint8_t val; -} __ec_align1; - -/* Read various CEC parameters */ -#define EC_CMD_CEC_GET 0x00BB - -/** - * struct ec_params_cec_get - CEC parameters get - * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS - */ -struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ -} __ec_align1; - -/** - * struct ec_response_cec_get - CEC parameters get response - * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is - * disabled or 1 if CEC functionality is enabled, - * in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the - * configured logical address between 0 and 15 or 0xff if unregistered - */ -struct ec_response_cec_get { - uint8_t val; -} __ec_align1; - -/* CEC parameters command */ -enum ec_cec_command { - /* CEC reading, writing and events enable */ - CEC_CMD_ENABLE, - /* CEC logical address */ - CEC_CMD_LOGICAL_ADDRESS, -}; - -/* Events from CEC to AP */ -enum mkbp_cec_event { - /* Outgoing message was acknowledged by a follower */ - EC_MKBP_CEC_SEND_OK = BIT(0), - /* Outgoing message was not acknowledged */ - EC_MKBP_CEC_SEND_FAILED = BIT(1), -}; - /*****************************************************************************/ /* * Special commands -- cgit v1.2.3 From fd3bbf4a47445a47b95d03f501651df0054ce117 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:40 -0700 Subject: mfd: cros_ec: Remove zero-size structs Empty structure size is different between C and C++. To prevent clang warning when compiling this include file in C++ programs, remove empty structures. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index d8bde2b5e9ce..fabf341af97f 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1540,10 +1540,14 @@ struct lightbar_program { struct ec_params_lightbar { uint8_t cmd; /* Command (see enum lightbar_command) */ union { - struct { - /* no args */ - } dump, off, on, init, get_seq, get_params_v0, get_params_v1, - version, get_brightness, get_demo, suspend, resume; + /* + * The following commands have no args: + * + * dump, off, on, init, get_seq, get_params_v0, get_params_v1, + * version, get_brightness, get_demo, suspend, resume + * + * Don't use an empty struct, because C++ hates that. + */ struct __ec_todo_unpacked { uint8_t num; @@ -1597,11 +1601,13 @@ struct ec_response_lightbar { uint8_t red, green, blue; } get_rgb; - struct { - /* no return params */ - } off, on, init, set_brightness, seq, reg, set_rgb, - demo, set_params_v0, set_params_v1, - set_program, manual_suspend_ctrl, suspend, resume; + /* + * The following commands have no response: + * + * off, on, init, set_brightness, seq, reg, set_rgb, + * set_params_v0, set_params_v1, set_program, + * manual_suspend_ctrl, suspend, resume + */ }; } __ec_todo_packed; @@ -3021,9 +3027,7 @@ enum charge_state_params { struct ec_params_charge_state { uint8_t cmd; /* enum charge_state_command */ union { - struct { - /* no args */ - } get_state; + /* get_state has no args */ struct __ec_todo_unpacked { uint32_t param; /* enum charge_state_param */ @@ -3049,9 +3053,8 @@ struct ec_response_charge_state { struct __ec_align4 { uint32_t value; } get_param; - struct { - /* no return values */ - } set_param; + + /* set_param returns no args */ }; } __ec_align4; -- cgit v1.2.3 From 3c46ae6160aff70e01bd180dd3ddcccf09fcf901 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:41 -0700 Subject: mfd: cros_ec: Add Flash V2 commands API Added for supporting larger embedded controller flash. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 150 ++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index fabf341af97f..3d1d26f62bd3 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1138,6 +1138,7 @@ struct ec_response_get_features { /* Get flash info */ #define EC_CMD_FLASH_INFO 0x0010 +#define EC_VER_FLASH_INFO 2 /** * struct ec_response_flash_info - Response to the flash info command. @@ -1164,6 +1165,15 @@ struct ec_response_flash_info { */ #define EC_FLASH_INFO_ERASE_TO_0 BIT(0) +/* + * Flash must be selected for read/write/erase operations to succeed. This may + * be necessary on a chip where write/erase can be corrupted by other board + * activity, or where the chip needs to enable some sort of programming voltage, + * or where the read/write/erase operations require cleanly suspending other + * chip functionality. + */ +#define EC_FLASH_INFO_SELECT_REQUIRED BIT(1) + /** * struct ec_response_flash_info_1 - Response to the flash info v1 command. * @flash_size: Usable flash size in bytes. @@ -1186,6 +1196,12 @@ struct ec_response_flash_info { * gcc anonymous structs don't seem to get along with the __packed directive; * if they did we'd define the version 0 structure as a sub-structure of this * one. + * + * Version 2 supports flash banks of different sizes: + * The caller specified the number of banks it has preallocated + * (num_banks_desc) + * The EC returns the number of banks describing the flash memory. + * It adds banks descriptions up to num_banks_desc. */ struct ec_response_flash_info_1 { /* Version 0 fields; see above for description */ @@ -1199,6 +1215,42 @@ struct ec_response_flash_info_1 { uint32_t flags; } __ec_align4; +struct ec_params_flash_info_2 { + /* Number of banks to describe */ + uint16_t num_banks_desc; + /* Reserved; set 0; ignore on read */ + uint8_t reserved[2]; +} __ec_align4; + +struct ec_flash_bank { + /* Number of sector is in this bank. */ + uint16_t count; + /* Size in power of 2 of each sector (8 --> 256 bytes) */ + uint8_t size_exp; + /* Minimal write size for the sectors in this bank */ + uint8_t write_size_exp; + /* Erase size for the sectors in this bank */ + uint8_t erase_size_exp; + /* Size for write protection, usually identical to erase size. */ + uint8_t protect_size_exp; + /* Reserved; set 0; ignore on read */ + uint8_t reserved[2]; +}; + +struct ec_response_flash_info_2 { + /* Total flash in the EC. */ + uint32_t flash_size; + /* Flags; see EC_FLASH_INFO_* */ + uint32_t flags; + /* Maximum size to use to send data to write to the EC. */ + uint32_t write_ideal_size; + /* Number of banks present in the EC. */ + uint16_t num_banks_total; + /* Number of banks described in banks array. */ + uint16_t num_banks_desc; + struct ec_flash_bank banks[0]; +} __ec_align4; + /* * Read flash * @@ -1238,7 +1290,7 @@ struct ec_params_flash_write { #define EC_CMD_FLASH_ERASE 0x0013 /** - * struct ec_params_flash_erase - Parameters for the flash erase command. + * struct ec_params_flash_erase - Parameters for the flash erase command, v0. * @offset: Byte offset to erase. * @size: Size to erase in bytes. */ @@ -1247,6 +1299,43 @@ struct ec_params_flash_erase { uint32_t size; } __ec_align4; +/* + * v1 add async erase: + * subcommands can returns: + * EC_RES_SUCCESS : erased (see ERASE_SECTOR_ASYNC case below). + * EC_RES_INVALID_PARAM : offset/size are not aligned on a erase boundary. + * EC_RES_ERROR : other errors. + * EC_RES_BUSY : an existing erase operation is in progress. + * EC_RES_ACCESS_DENIED: Trying to erase running image. + * + * When ERASE_SECTOR_ASYNC returns EC_RES_SUCCESS, the operation is just + * properly queued. The user must call ERASE_GET_RESULT subcommand to get + * the proper result. + * When ERASE_GET_RESULT returns EC_RES_BUSY, the caller must wait and send + * ERASE_GET_RESULT again to get the result of ERASE_SECTOR_ASYNC. + * ERASE_GET_RESULT command may timeout on EC where flash access is not + * permitted while erasing. (For instance, STM32F4). + */ +enum ec_flash_erase_cmd { + FLASH_ERASE_SECTOR, /* Erase and wait for result */ + FLASH_ERASE_SECTOR_ASYNC, /* Erase and return immediately. */ + FLASH_ERASE_GET_RESULT, /* Ask for last erase result */ +}; + +/** + * struct ec_params_flash_erase_v1 - Parameters for the flash erase command, v1. + * @cmd: One of ec_flash_erase_cmd. + * @reserved: Pad byte; currently always contains 0. + * @flag: No flags defined yet; set to 0. + * @params: Same as v0 parameters. + */ +struct ec_params_flash_erase_v1 { + uint8_t cmd; + uint8_t reserved; + uint16_t flag; + struct ec_params_flash_erase params; +} __ec_align4; + /* * Get/set flash protection. * @@ -1282,6 +1371,15 @@ struct ec_params_flash_erase { #define EC_FLASH_PROTECT_ERROR_INCONSISTENT BIT(5) /* Entire flash code protected when the EC boots */ #define EC_FLASH_PROTECT_ALL_AT_BOOT BIT(6) +/* RW flash code protected when the EC boots */ +#define EC_FLASH_PROTECT_RW_AT_BOOT BIT(7) +/* RW flash code protected now. */ +#define EC_FLASH_PROTECT_RW_NOW BIT(8) +/* Rollback information flash region protected when the EC boots */ +#define EC_FLASH_PROTECT_ROLLBACK_AT_BOOT BIT(9) +/* Rollback information flash region protected now */ +#define EC_FLASH_PROTECT_ROLLBACK_NOW BIT(10) + /** * struct ec_params_flash_protect - Parameters for the flash protect command. @@ -1320,16 +1418,31 @@ struct ec_response_flash_protect { enum ec_flash_region { /* Region which holds read-only EC image */ EC_FLASH_REGION_RO = 0, - /* Region which holds rewritable EC image */ - EC_FLASH_REGION_RW, + /* + * Region which holds active RW image. 'Active' is different from + * 'running'. Active means 'scheduled-to-run'. Since RO image always + * scheduled to run, active/non-active applies only to RW images (for + * the same reason 'update' applies only to RW images. It's a state of + * an image on a flash. Running image can be RO, RW_A, RW_B but active + * image can only be RW_A or RW_B. In recovery mode, an active RW image + * doesn't enter 'running' state but it's still active on a flash. + */ + EC_FLASH_REGION_ACTIVE, /* * Region which should be write-protected in the factory (a superset of * EC_FLASH_REGION_RO) */ EC_FLASH_REGION_WP_RO, + /* Region which holds updatable (non-active) RW image */ + EC_FLASH_REGION_UPDATE, /* Number of regions */ EC_FLASH_REGION_COUNT, }; +/* + * 'RW' is vague if there are multiple RW images; we mean the active one, + * so the old constant is deprecated. + */ +#define EC_FLASH_REGION_RW EC_FLASH_REGION_ACTIVE /** * struct ec_params_flash_region_info - Parameters for the flash region info @@ -1364,6 +1477,37 @@ struct ec_response_vbnvcontext { uint8_t block[EC_VBNV_BLOCK_SIZE]; } __ec_align4; + +/* Get SPI flash information */ +#define EC_CMD_FLASH_SPI_INFO 0x0018 + +struct ec_response_flash_spi_info { + /* JEDEC info from command 0x9F (manufacturer, memory type, size) */ + uint8_t jedec[3]; + + /* Pad byte; currently always contains 0 */ + uint8_t reserved0; + + /* Manufacturer / device ID from command 0x90 */ + uint8_t mfr_dev_id[2]; + + /* Status registers from command 0x05 and 0x35 */ + uint8_t sr1, sr2; +} __ec_align1; + + +/* Select flash during flash operations */ +#define EC_CMD_FLASH_SELECT 0x0019 + +/** + * struct ec_params_flash_select - Parameters for the flash select command. + * @select: 1 to select flash, 0 to deselect flash + */ +struct ec_params_flash_select { + uint8_t select; +} __ec_align4; + + /*****************************************************************************/ /* PWM commands */ -- cgit v1.2.3 From 89193a04fce5b869eaf20a330deebcde0dd6806f Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:42 -0700 Subject: mfd: cros_ec: Add PWM_SET_DUTY API Add API for fan control. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 3d1d26f62bd3..2b3a94a4f0f4 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1521,11 +1521,19 @@ struct ec_response_pwm_get_fan_rpm { /* Set target fan RPM */ #define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x0021 -struct ec_params_pwm_set_fan_target_rpm { +/* Version 0 of input params */ +struct ec_params_pwm_set_fan_target_rpm_v0 { uint32_t rpm; +} __ec_align4; + +/* Version 1 of input params */ +struct ec_params_pwm_set_fan_target_rpm_v1 { + uint32_t rpm; + uint8_t fan_idx; } __ec_align_size1; /* Get keyboard backlight */ +/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */ #define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x0022 struct ec_response_pwm_get_keyboard_backlight { @@ -1534,6 +1542,7 @@ struct ec_response_pwm_get_keyboard_backlight { } __ec_align1; /* Set keyboard backlight */ +/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */ #define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x0023 struct ec_params_pwm_set_keyboard_backlight { @@ -1543,10 +1552,17 @@ struct ec_params_pwm_set_keyboard_backlight { /* Set target fan PWM duty cycle */ #define EC_CMD_PWM_SET_FAN_DUTY 0x0024 -struct ec_params_pwm_set_fan_duty { +/* Version 0 of input params */ +struct ec_params_pwm_set_fan_duty_v0 { uint32_t percent; } __ec_align4; +/* Version 1 of input params */ +struct ec_params_pwm_set_fan_duty_v1 { + uint32_t percent; + uint8_t fan_idx; +} __ec_align_size1; + #define EC_CMD_PWM_SET_DUTY 0x0025 /* 16 bit duty cycle, 0xffff = 100% */ #define EC_PWM_MAX_DUTY 0xffff -- cgit v1.2.3 From de83db57d7343a201f2ef0204929015ba268e098 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:43 -0700 Subject: mfd: cros_ec: Add lightbar v2 API New API split commands, improve EC command latency. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 124 +++++++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 2b3a94a4f0f4..0ff1941288cf 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1658,7 +1658,10 @@ struct lightbar_params_v1 { int32_t s3_sleep_for; int32_t s3_ramp_up; int32_t s3_ramp_down; + int32_t s5_ramp_up; + int32_t s5_ramp_down; int32_t tap_tick_delay; + int32_t tap_gate_delay; int32_t tap_display_time; /* Tap-for-battery params */ @@ -1686,11 +1689,82 @@ struct lightbar_params_v1 { uint8_t s0_idx[2][LB_BATTERY_LEVELS]; /* AP is running */ uint8_t s3_idx[2][LB_BATTERY_LEVELS]; /* AP is sleeping */ + /* s5: single color pulse on inhibited power-up */ + uint8_t s5_idx; + /* Color palette */ struct rgb_s color[8]; /* 0-3 are Google colors */ } __ec_todo_packed; -/* Lightbar program */ +/* Lightbar command params v2 + * crbug.com/467716 + * + * lightbar_parms_v1 was too big for i2c, therefore in v2, we split them up by + * logical groups to make it more manageable ( < 120 bytes). + * + * NOTE: Each of these groups must be less than 120 bytes. + */ + +struct lightbar_params_v2_timing { + /* Timing */ + int32_t google_ramp_up; + int32_t google_ramp_down; + int32_t s3s0_ramp_up; + int32_t s0_tick_delay[2]; /* AC=0/1 */ + int32_t s0a_tick_delay[2]; /* AC=0/1 */ + int32_t s0s3_ramp_down; + int32_t s3_sleep_for; + int32_t s3_ramp_up; + int32_t s3_ramp_down; + int32_t s5_ramp_up; + int32_t s5_ramp_down; + int32_t tap_tick_delay; + int32_t tap_gate_delay; + int32_t tap_display_time; +} __ec_todo_packed; + +struct lightbar_params_v2_tap { + /* Tap-for-battery params */ + uint8_t tap_pct_red; + uint8_t tap_pct_green; + uint8_t tap_seg_min_on; + uint8_t tap_seg_max_on; + uint8_t tap_seg_osc; + uint8_t tap_idx[3]; +} __ec_todo_packed; + +struct lightbar_params_v2_oscillation { + /* Oscillation */ + uint8_t osc_min[2]; /* AC=0/1 */ + uint8_t osc_max[2]; /* AC=0/1 */ + uint8_t w_ofs[2]; /* AC=0/1 */ +} __ec_todo_packed; + +struct lightbar_params_v2_brightness { + /* Brightness limits based on the backlight and AC. */ + uint8_t bright_bl_off_fixed[2]; /* AC=0/1 */ + uint8_t bright_bl_on_min[2]; /* AC=0/1 */ + uint8_t bright_bl_on_max[2]; /* AC=0/1 */ +} __ec_todo_packed; + +struct lightbar_params_v2_thresholds { + /* Battery level thresholds */ + uint8_t battery_threshold[LB_BATTERY_LEVELS - 1]; +} __ec_todo_packed; + +struct lightbar_params_v2_colors { + /* Map [AC][battery_level] to color index */ + uint8_t s0_idx[2][LB_BATTERY_LEVELS]; /* AP is running */ + uint8_t s3_idx[2][LB_BATTERY_LEVELS]; /* AP is sleeping */ + + /* s5: single color pulse on inhibited power-up */ + uint8_t s5_idx; + + /* Color palette */ + struct rgb_s color[8]; /* 0-3 are Google colors */ +} __ec_todo_packed; + +/* Lightbar program. */ #define EC_LB_PROG_LEN 192 struct lightbar_program { uint8_t size; @@ -1704,7 +1778,10 @@ struct ec_params_lightbar { * The following commands have no args: * * dump, off, on, init, get_seq, get_params_v0, get_params_v1, - * version, get_brightness, get_demo, suspend, resume + * version, get_brightness, get_demo, suspend, resume, + * get_params_v2_timing, get_params_v2_tap, get_params_v2_osc, + * get_params_v2_bright, get_params_v2_thlds, + * get_params_v2_colors * * Don't use an empty struct, because C++ hates that. */ @@ -1731,6 +1808,14 @@ struct ec_params_lightbar { struct lightbar_params_v0 set_params_v0; struct lightbar_params_v1 set_params_v1; + + struct lightbar_params_v2_timing set_v2par_timing; + struct lightbar_params_v2_tap set_v2par_tap; + struct lightbar_params_v2_oscillation set_v2par_osc; + struct lightbar_params_v2_brightness set_v2par_bright; + struct lightbar_params_v2_thresholds set_v2par_thlds; + struct lightbar_params_v2_colors set_v2par_colors; + struct lightbar_program set_program; }; } __ec_todo_packed; @@ -1752,6 +1837,14 @@ struct ec_response_lightbar { struct lightbar_params_v0 get_params_v0; struct lightbar_params_v1 get_params_v1; + + struct lightbar_params_v2_timing get_params_v2_timing; + struct lightbar_params_v2_tap get_params_v2_tap; + struct lightbar_params_v2_oscillation get_params_v2_osc; + struct lightbar_params_v2_brightness get_params_v2_bright; + struct lightbar_params_v2_thresholds get_params_v2_thlds; + struct lightbar_params_v2_colors get_params_v2_colors; + struct __ec_todo_unpacked { uint32_t num; uint32_t flags; @@ -1764,9 +1857,11 @@ struct ec_response_lightbar { /* * The following commands have no response: * - * off, on, init, set_brightness, seq, reg, set_rgb, + * off, on, init, set_brightness, seq, reg, set_rgb, demo, * set_params_v0, set_params_v1, set_program, - * manual_suspend_ctrl, suspend, resume + * manual_suspend_ctrl, suspend, resume, set_v2par_timing, + * set_v2par_tap, set_v2par_osc, set_v2par_bright, + * set_v2par_thlds, set_v2par_colors */ }; } __ec_todo_packed; @@ -1795,6 +1890,18 @@ enum lightbar_command { LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19, LIGHTBAR_CMD_SUSPEND = 20, LIGHTBAR_CMD_RESUME = 21, + LIGHTBAR_CMD_GET_PARAMS_V2_TIMING = 22, + LIGHTBAR_CMD_SET_PARAMS_V2_TIMING = 23, + LIGHTBAR_CMD_GET_PARAMS_V2_TAP = 24, + LIGHTBAR_CMD_SET_PARAMS_V2_TAP = 25, + LIGHTBAR_CMD_GET_PARAMS_V2_OSCILLATION = 26, + LIGHTBAR_CMD_SET_PARAMS_V2_OSCILLATION = 27, + LIGHTBAR_CMD_GET_PARAMS_V2_BRIGHTNESS = 28, + LIGHTBAR_CMD_SET_PARAMS_V2_BRIGHTNESS = 29, + LIGHTBAR_CMD_GET_PARAMS_V2_THRESHOLDS = 30, + LIGHTBAR_CMD_SET_PARAMS_V2_THRESHOLDS = 31, + LIGHTBAR_CMD_GET_PARAMS_V2_COLORS = 32, + LIGHTBAR_CMD_SET_PARAMS_V2_COLORS = 33, LIGHTBAR_NUM_CMDS }; @@ -1813,6 +1920,14 @@ enum ec_led_id { EC_LED_ID_POWER_LED, /* LED on power adapter or its plug */ EC_LED_ID_ADAPTER_LED, + /* LED to indicate left side */ + EC_LED_ID_LEFT_LED, + /* LED to indicate right side */ + EC_LED_ID_RIGHT_LED, + /* LED to indicate recovery mode with HW_REINIT */ + EC_LED_ID_RECOVERY_HW_REINIT_LED, + /* LED to indicate sysrq debug mode. */ + EC_LED_ID_SYSRQ_DEBUG_LED, EC_LED_ID_COUNT }; @@ -1827,6 +1942,7 @@ enum ec_led_colors { EC_LED_COLOR_BLUE, EC_LED_COLOR_YELLOW, EC_LED_COLOR_WHITE, + EC_LED_COLOR_AMBER, EC_LED_COLOR_COUNT }; -- cgit v1.2.3 From 03f6896aeb5bd03b95d29a0f22b3820773d97b9d Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:44 -0700 Subject: mfd: cros_ec: Expand hash API Improve API to verify EC image signature. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 0ff1941288cf..76943e64998a 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2018,8 +2018,15 @@ enum ec_vboot_hash_status { * If one of these is specified, the EC will automatically update offset and * size to the correct values for the specified image (RO or RW). */ -#define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe -#define EC_VBOOT_HASH_OFFSET_RW 0xfffffffd +#define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe +#define EC_VBOOT_HASH_OFFSET_ACTIVE 0xfffffffd +#define EC_VBOOT_HASH_OFFSET_UPDATE 0xfffffffc + +/* + * 'RW' is vague if there are multiple RW images; we mean the active one, + * so the old constant is deprecated. + */ +#define EC_VBOOT_HASH_OFFSET_RW EC_VBOOT_HASH_OFFSET_ACTIVE /*****************************************************************************/ /* -- cgit v1.2.3 From 2908c4ed296ee2107c03503328eb951d5bc58211 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:45 -0700 Subject: mfd: cros_ec: Add EC transport protocol v4 Introduce a new transport procotol between EC and host. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 143 ++++++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 76943e64998a..40a8069a58e8 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -455,7 +455,10 @@ #define EC_LPC_STATUS_BUSY_MASK \ (EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING) -/* Host command response codes */ +/* + * Host command response codes (16-bit). Note that response codes should be + * stored in a uint16_t rather than directly in a value of this type. + */ enum ec_status { EC_RES_SUCCESS = 0, EC_RES_INVALID_COMMAND = 1, @@ -471,7 +474,13 @@ enum ec_status { EC_RES_OVERFLOW = 11, /* Table / data overflow */ EC_RES_INVALID_HEADER = 12, /* Header contains invalid data */ EC_RES_REQUEST_TRUNCATED = 13, /* Didn't get the entire request */ - EC_RES_RESPONSE_TOO_BIG = 14 /* Response was too big to handle */ + EC_RES_RESPONSE_TOO_BIG = 14, /* Response was too big to handle */ + EC_RES_BUS_ERROR = 15, /* Communications bus error */ + EC_RES_BUSY = 16, /* Up but too busy. Should retry */ + EC_RES_INVALID_HEADER_VERSION = 17, /* Header version invalid */ + EC_RES_INVALID_HEADER_CRC = 18, /* Header CRC invalid */ + EC_RES_INVALID_DATA_CRC = 19, /* Data CRC invalid */ + EC_RES_DUP_UNAVAILABLE = 20, /* Can't resend response */ }; /* @@ -744,6 +753,136 @@ struct ec_host_response { uint16_t reserved; } __ec_align4; +/*****************************************************************************/ + +/* + * Host command protocol V4. + * + * Packets always start with a request or response header. They are followed + * by data_len bytes of data. If the data_crc_present flag is set, the data + * bytes are followed by a CRC-8 of that data, using using x^8 + x^2 + x + 1 + * polynomial. + * + * Host algorithm when sending a request q: + * + * 101) tries_left=(some value, e.g. 3); + * 102) q.seq_num++ + * 103) q.seq_dup=0 + * 104) Calculate q.header_crc. + * 105) Send request q to EC. + * 106) Wait for response r. Go to 201 if received or 301 if timeout. + * + * 201) If r.struct_version != 4, go to 301. + * 202) If r.header_crc mismatches calculated CRC for r header, go to 301. + * 203) If r.data_crc_present and r.data_crc mismatches, go to 301. + * 204) If r.seq_num != q.seq_num, go to 301. + * 205) If r.seq_dup == q.seq_dup, return success. + * 207) If r.seq_dup == 1, go to 301. + * 208) Return error. + * + * 301) If --tries_left <= 0, return error. + * 302) If q.seq_dup == 1, go to 105. + * 303) q.seq_dup = 1 + * 304) Go to 104. + * + * EC algorithm when receiving a request q. + * EC has response buffer r, error buffer e. + * + * 101) If q.struct_version != 4, set e.result = EC_RES_INVALID_HEADER_VERSION + * and go to 301 + * 102) If q.header_crc mismatches calculated CRC, set e.result = + * EC_RES_INVALID_HEADER_CRC and go to 301 + * 103) If q.data_crc_present, calculate data CRC. If that mismatches the CRC + * byte at the end of the packet, set e.result = EC_RES_INVALID_DATA_CRC + * and go to 301. + * 104) If q.seq_dup == 0, go to 201. + * 105) If q.seq_num != r.seq_num, go to 201. + * 106) If q.seq_dup == r.seq_dup, go to 205, else go to 203. + * + * 201) Process request q into response r. + * 202) r.seq_num = q.seq_num + * 203) r.seq_dup = q.seq_dup + * 204) Calculate r.header_crc + * 205) If r.data_len > 0 and data is no longer available, set e.result = + * EC_RES_DUP_UNAVAILABLE and go to 301. + * 206) Send response r. + * + * 301) e.seq_num = q.seq_num + * 302) e.seq_dup = q.seq_dup + * 303) Calculate e.header_crc. + * 304) Send error response e. + */ + +/* Version 4 request from host */ +struct ec_host_request4 { + /* + * bits 0-3: struct_version: Structure version (=4) + * bit 4: is_response: Is response (=0) + * bits 5-6: seq_num: Sequence number + * bit 7: seq_dup: Sequence duplicate flag + */ + uint8_t fields0; + + /* + * bits 0-4: command_version: Command version + * bits 5-6: Reserved (set 0, ignore on read) + * bit 7: data_crc_present: Is data CRC present after data + */ + uint8_t fields1; + + /* Command code (EC_CMD_*) */ + uint16_t command; + + /* Length of data which follows this header (not including data CRC) */ + uint16_t data_len; + + /* Reserved (set 0, ignore on read) */ + uint8_t reserved; + + /* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */ + uint8_t header_crc; +} __ec_align4; + +/* Version 4 response from EC */ +struct ec_host_response4 { + /* + * bits 0-3: struct_version: Structure version (=4) + * bit 4: is_response: Is response (=1) + * bits 5-6: seq_num: Sequence number + * bit 7: seq_dup: Sequence duplicate flag + */ + uint8_t fields0; + + /* + * bits 0-6: Reserved (set 0, ignore on read) + * bit 7: data_crc_present: Is data CRC present after data + */ + uint8_t fields1; + + /* Result code (EC_RES_*) */ + uint16_t result; + + /* Length of data which follows this header (not including data CRC) */ + uint16_t data_len; + + /* Reserved (set 0, ignore on read) */ + uint8_t reserved; + + /* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */ + uint8_t header_crc; +} __ec_align4; + +/* Fields in fields0 byte */ +#define EC_PACKET4_0_STRUCT_VERSION_MASK 0x0f +#define EC_PACKET4_0_IS_RESPONSE_MASK 0x10 +#define EC_PACKET4_0_SEQ_NUM_SHIFT 5 +#define EC_PACKET4_0_SEQ_NUM_MASK 0x60 +#define EC_PACKET4_0_SEQ_DUP_MASK 0x80 + +/* Fields in fields1 byte */ +#define EC_PACKET4_1_COMMAND_VERSION_MASK 0x1f /* (request only) */ +#define EC_PACKET4_1_DATA_CRC_PRESENT_MASK 0x80 + /*****************************************************************************/ /* * Notes on commands: -- cgit v1.2.3 From a517bb4bb8c15e6f427496b9bb7eba89f0b96bbb Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:46 -0700 Subject: mfd: cros_ec: Complete MEMS sensor API Add new command for batched mode, add support for more sensors. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 464 ++++++++++++++++++++++++++++++----- 1 file changed, 406 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 40a8069a58e8..701b03cfa445 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2191,7 +2191,13 @@ enum motionsense_command { /* * EC Rate command is a setter/getter command for the EC sampling rate - * of all motion sensors in milliseconds. + * in milliseconds. + * It is per sensor, the EC run sample task at the minimum of all + * sensors EC_RATE. + * For sensors without hardware FIFO, EC_RATE should be equals to 1/ODR + * to collect all the sensor samples. + * For sensor with hardware FIFO, EC_RATE is used as the maximal delay + * to process of all motion sensors in milliseconds. */ MOTIONSENSE_CMD_EC_RATE = 2, @@ -2222,32 +2228,76 @@ enum motionsense_command { MOTIONSENSE_CMD_DATA = 6, /* - * Perform low level calibration.. On sensors that support it, ask to - * do offset calibration. + * Return sensor fifo info. + */ + MOTIONSENSE_CMD_FIFO_INFO = 7, + + /* + * Insert a flush element in the fifo and return sensor fifo info. + * The host can use that element to synchronize its operation. + */ + MOTIONSENSE_CMD_FIFO_FLUSH = 8, + + /* + * Return a portion of the fifo. + */ + MOTIONSENSE_CMD_FIFO_READ = 9, + + /* + * Perform low level calibration. + * On sensors that support it, ask to do offset calibration. */ MOTIONSENSE_CMD_PERFORM_CALIB = 10, /* - * Sensor Offset command is a setter/getter command for the offset used - * for calibration. The offsets can be calculated by the host, or via + * Sensor Offset command is a setter/getter command for the offset + * used for calibration. + * The offsets can be calculated by the host, or via * PERFORM_CALIB command. */ MOTIONSENSE_CMD_SENSOR_OFFSET = 11, - /* Number of motionsense sub-commands. */ - MOTIONSENSE_NUM_CMDS -}; + /* + * List available activities for a MOTION sensor. + * Indicates if they are enabled or disabled. + */ + MOTIONSENSE_CMD_LIST_ACTIVITIES = 12, + + /* + * Activity management + * Enable/Disable activity recognition. + */ + MOTIONSENSE_CMD_SET_ACTIVITY = 13, + + /* + * Lid Angle + */ + MOTIONSENSE_CMD_LID_ANGLE = 14, + + /* + * Allow the FIFO to trigger interrupt via MKBP events. + * By default the FIFO does not send interrupt to process the FIFO + * until the AP is ready or it is coming from a wakeup sensor. + */ + MOTIONSENSE_CMD_FIFO_INT_ENABLE = 15, + + /* + * Spoof the readings of the sensors. The spoofed readings can be set + * to arbitrary values, or will lock to the last read actual values. + */ + MOTIONSENSE_CMD_SPOOF = 16, -enum motionsensor_id { - EC_MOTION_SENSOR_ACCEL_BASE = 0, - EC_MOTION_SENSOR_ACCEL_LID = 1, - EC_MOTION_SENSOR_GYRO = 2, + /* Set lid angle for tablet mode detection. */ + MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE = 17, /* - * Note, if more sensors are added and this count changes, the padding - * in ec_response_motion_sense dump command must be modified. + * Sensor Scale command is a setter/getter command for the calibration + * scale. */ - EC_MOTION_SENSOR_COUNT = 3 + MOTIONSENSE_CMD_SENSOR_SCALE = 18, + + /* Number of motionsense sub-commands. */ + MOTIONSENSE_NUM_CMDS }; /* List of motion sensor types. */ @@ -2259,6 +2309,7 @@ enum motionsensor_type { MOTIONSENSE_TYPE_LIGHT = 4, MOTIONSENSE_TYPE_ACTIVITY = 5, MOTIONSENSE_TYPE_BARO = 6, + MOTIONSENSE_TYPE_SYNC = 7, MOTIONSENSE_TYPE_MAX, }; @@ -2266,36 +2317,48 @@ enum motionsensor_type { enum motionsensor_location { MOTIONSENSE_LOC_BASE = 0, MOTIONSENSE_LOC_LID = 1, + MOTIONSENSE_LOC_CAMERA = 2, MOTIONSENSE_LOC_MAX, }; /* List of motion sensor chips. */ enum motionsensor_chip { MOTIONSENSE_CHIP_KXCJ9 = 0, + MOTIONSENSE_CHIP_LSM6DS0 = 1, + MOTIONSENSE_CHIP_BMI160 = 2, + MOTIONSENSE_CHIP_SI1141 = 3, + MOTIONSENSE_CHIP_SI1142 = 4, + MOTIONSENSE_CHIP_SI1143 = 5, + MOTIONSENSE_CHIP_KX022 = 6, + MOTIONSENSE_CHIP_L3GD20H = 7, + MOTIONSENSE_CHIP_BMA255 = 8, + MOTIONSENSE_CHIP_BMP280 = 9, + MOTIONSENSE_CHIP_OPT3001 = 10, + MOTIONSENSE_CHIP_BH1730 = 11, + MOTIONSENSE_CHIP_GPIO = 12, + MOTIONSENSE_CHIP_LIS2DH = 13, + MOTIONSENSE_CHIP_LSM6DSM = 14, + MOTIONSENSE_CHIP_LIS2DE = 15, + MOTIONSENSE_CHIP_LIS2MDL = 16, + MOTIONSENSE_CHIP_LSM6DS3 = 17, + MOTIONSENSE_CHIP_LSM6DSO = 18, + MOTIONSENSE_CHIP_LNG2DM = 19, + MOTIONSENSE_CHIP_MAX, }; -/* Module flag masks used for the dump sub-command. */ -#define MOTIONSENSE_MODULE_FLAG_ACTIVE (1<<0) - -/* Sensor flag masks used for the dump sub-command. */ -#define MOTIONSENSE_SENSOR_FLAG_PRESENT (1<<0) - -/* - * Send this value for the data element to only perform a read. If you - * send any other value, the EC will interpret it as data to set and will - * return the actual value set. - */ -#define EC_MOTION_SENSE_NO_VALUE -1 - -#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000 - -/* Set Calibration information */ -#define MOTION_SENSE_SET_OFFSET 1 +/* List of orientation positions */ +enum motionsensor_orientation { + MOTIONSENSE_ORIENTATION_LANDSCAPE = 0, + MOTIONSENSE_ORIENTATION_PORTRAIT = 1, + MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_PORTRAIT = 2, + MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_LANDSCAPE = 3, + MOTIONSENSE_ORIENTATION_UNKNOWN = 4, +}; struct ec_response_motion_sensor_data { /* Flags for each sensor. */ uint8_t flags; - /* Sensor number the data comes from */ + /* Sensor number the data comes from. */ uint8_t sensor_num; /* Each sensor is up to 3-axis. */ union { @@ -2312,22 +2375,138 @@ struct ec_response_motion_sensor_data { }; } __ec_todo_packed; +/* Note: used in ec_response_get_next_data */ +struct ec_response_motion_sense_fifo_info { + /* Size of the fifo */ + uint16_t size; + /* Amount of space used in the fifo */ + uint16_t count; + /* Timestamp recorded in us. + * aka accurate timestamp when host event was triggered. + */ + uint32_t timestamp; + /* Total amount of vector lost */ + uint16_t total_lost; + /* Lost events since the last fifo_info, per sensors */ + uint16_t lost[0]; +} __ec_todo_packed; + +struct ec_response_motion_sense_fifo_data { + uint32_t number_data; + struct ec_response_motion_sensor_data data[0]; +} __ec_todo_packed; + +/* List supported activity recognition */ +enum motionsensor_activity { + MOTIONSENSE_ACTIVITY_RESERVED = 0, + MOTIONSENSE_ACTIVITY_SIG_MOTION = 1, + MOTIONSENSE_ACTIVITY_DOUBLE_TAP = 2, + MOTIONSENSE_ACTIVITY_ORIENTATION = 3, +}; + +struct ec_motion_sense_activity { + uint8_t sensor_num; + uint8_t activity; /* one of enum motionsensor_activity */ + uint8_t enable; /* 1: enable, 0: disable */ + uint8_t reserved; + uint16_t parameters[3]; /* activity dependent parameters */ +} __ec_todo_unpacked; + +/* Module flag masks used for the dump sub-command. */ +#define MOTIONSENSE_MODULE_FLAG_ACTIVE BIT(0) + +/* Sensor flag masks used for the dump sub-command. */ +#define MOTIONSENSE_SENSOR_FLAG_PRESENT BIT(0) + +/* + * Flush entry for synchronization. + * data contains time stamp + */ +#define MOTIONSENSE_SENSOR_FLAG_FLUSH BIT(0) +#define MOTIONSENSE_SENSOR_FLAG_TIMESTAMP BIT(1) +#define MOTIONSENSE_SENSOR_FLAG_WAKEUP BIT(2) +#define MOTIONSENSE_SENSOR_FLAG_TABLET_MODE BIT(3) +#define MOTIONSENSE_SENSOR_FLAG_ODR BIT(4) + +/* + * Send this value for the data element to only perform a read. If you + * send any other value, the EC will interpret it as data to set and will + * return the actual value set. + */ +#define EC_MOTION_SENSE_NO_VALUE -1 + +#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000 + +/* MOTIONSENSE_CMD_SENSOR_OFFSET subcommand flag */ +/* Set Calibration information */ +#define MOTION_SENSE_SET_OFFSET BIT(0) + +/* Default Scale value, factor 1. */ +#define MOTION_SENSE_DEFAULT_SCALE BIT(15) + +#define LID_ANGLE_UNRELIABLE 500 + +enum motionsense_spoof_mode { + /* Disable spoof mode. */ + MOTIONSENSE_SPOOF_MODE_DISABLE = 0, + + /* Enable spoof mode, but use provided component values. */ + MOTIONSENSE_SPOOF_MODE_CUSTOM, + + /* Enable spoof mode, but use the current sensor values. */ + MOTIONSENSE_SPOOF_MODE_LOCK_CURRENT, + + /* Query the current spoof mode status for the sensor. */ + MOTIONSENSE_SPOOF_MODE_QUERY, +}; + struct ec_params_motion_sense { uint8_t cmd; union { /* Used for MOTIONSENSE_CMD_DUMP. */ struct __ec_todo_unpacked { - /* no args */ + /* + * Maximal number of sensor the host is expecting. + * 0 means the host is only interested in the number + * of sensors controlled by the EC. + */ + uint8_t max_sensor_count; } dump; /* - * Used for MOTIONSENSE_CMD_EC_RATE and - * MOTIONSENSE_CMD_KB_WAKE_ANGLE. + * Used for MOTIONSENSE_CMD_KB_WAKE_ANGLE. */ struct __ec_todo_unpacked { - /* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */ + /* Data to set or EC_MOTION_SENSE_NO_VALUE to read. + * kb_wake_angle: angle to wakup AP. + */ int16_t data; - } ec_rate, kb_wake_angle; + } kb_wake_angle; + + /* + * Used for MOTIONSENSE_CMD_INFO, MOTIONSENSE_CMD_DATA + * and MOTIONSENSE_CMD_PERFORM_CALIB. + */ + struct __ec_todo_unpacked { + uint8_t sensor_num; + } info, info_3, data, fifo_flush, perform_calib, + list_activities; + + /* + * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR + * and MOTIONSENSE_CMD_SENSOR_RANGE. + */ + struct __ec_todo_unpacked { + uint8_t sensor_num; + + /* Rounding flag, true for round-up, false for down. */ + uint8_t roundup; + + uint16_t reserved; + + /* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */ + int32_t data; + } ec_rate, sensor_odr, sensor_range; /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ struct __ec_todo_packed { @@ -2358,33 +2537,99 @@ struct ec_params_motion_sense { int16_t offset[3]; } sensor_offset; - /* Used for MOTIONSENSE_CMD_INFO. */ + /* Used for MOTIONSENSE_CMD_SENSOR_SCALE */ struct __ec_todo_packed { uint8_t sensor_num; - } info; - /* - * Used for MOTIONSENSE_CMD_SENSOR_ODR and - * MOTIONSENSE_CMD_SENSOR_RANGE. - */ - struct { - /* Should be element of enum motionsensor_id. */ - uint8_t sensor_num; + /* + * bit 0: If set (MOTION_SENSE_SET_OFFSET), set + * the calibration information in the EC. + * If unset, just retrieve calibration information. + */ + uint16_t flags; - /* Rounding flag, true for round-up, false for down. */ - uint8_t roundup; + /* + * Temperature at calibration, in units of 0.01 C + * 0x8000: invalid / unknown. + * 0x0: 0C + * 0x7fff: +327.67C + */ + int16_t temp; - uint16_t reserved; + /* + * Scale for calibration: + * By default scale is 1, it is encoded on 16bits: + * 1 = BIT(15) + * ~2 = 0xFFFF + * ~0 = 0. + */ + uint16_t scale[3]; + } sensor_scale; - /* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */ - int32_t data; - } sensor_odr, sensor_range; + + /* Used for MOTIONSENSE_CMD_FIFO_INFO */ + /* (no params) */ + + /* Used for MOTIONSENSE_CMD_FIFO_READ */ + struct __ec_todo_unpacked { + /* + * Number of expected vector to return. + * EC may return less or 0 if none available. + */ + uint32_t max_data_vector; + } fifo_read; + + struct ec_motion_sense_activity set_activity; + + /* Used for MOTIONSENSE_CMD_LID_ANGLE */ + /* (no params) */ + + /* Used for MOTIONSENSE_CMD_FIFO_INT_ENABLE */ + struct __ec_todo_unpacked { + /* + * 1: enable, 0 disable fifo, + * EC_MOTION_SENSE_NO_VALUE return value. + */ + int8_t enable; + } fifo_int_enable; + + /* Used for MOTIONSENSE_CMD_SPOOF */ + struct __ec_todo_packed { + uint8_t sensor_id; + + /* See enum motionsense_spoof_mode. */ + uint8_t spoof_enable; + + /* Ignored, used for alignment. */ + uint8_t reserved; + + /* Individual component values to spoof. */ + int16_t components[3]; + } spoof; + + /* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */ + struct __ec_todo_unpacked { + /* + * Lid angle threshold for switching between tablet and + * clamshell mode. + */ + int16_t lid_angle; + + /* + * Hysteresis degree to prevent fluctuations between + * clamshell and tablet mode if lid angle keeps + * changing around the threshold. Lid motion driver will + * use lid_angle + hys_degree to trigger tablet mode and + * lid_angle - hys_degree to trigger clamshell mode. + */ + int16_t hys_degree; + } tablet_mode_threshold; }; } __ec_todo_packed; struct ec_response_motion_sense { union { - /* Used for MOTIONSENSE_CMD_DUMP. */ + /* Used for MOTIONSENSE_CMD_DUMP */ struct __ec_todo_unpacked { /* Flags representing the motion sensor module. */ uint8_t module_flags; @@ -2411,27 +2656,118 @@ struct ec_response_motion_sense { uint8_t chip; } info; + /* Used for MOTIONSENSE_CMD_INFO version 3 */ + struct __ec_todo_unpacked { + /* Should be element of enum motionsensor_type. */ + uint8_t type; + + /* Should be element of enum motionsensor_location. */ + uint8_t location; + + /* Should be element of enum motionsensor_chip. */ + uint8_t chip; + + /* Minimum sensor sampling frequency */ + uint32_t min_frequency; + + /* Maximum sensor sampling frequency */ + uint32_t max_frequency; + + /* Max number of sensor events that could be in fifo */ + uint32_t fifo_max_event_count; + } info_3; + /* Used for MOTIONSENSE_CMD_DATA */ struct ec_response_motion_sensor_data data; /* * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR, - * MOTIONSENSE_CMD_SENSOR_RANGE, and - * MOTIONSENSE_CMD_KB_WAKE_ANGLE. + * MOTIONSENSE_CMD_SENSOR_RANGE, + * MOTIONSENSE_CMD_KB_WAKE_ANGLE, + * MOTIONSENSE_CMD_FIFO_INT_ENABLE and + * MOTIONSENSE_CMD_SPOOF. */ struct __ec_todo_unpacked { /* Current value of the parameter queried. */ int32_t ret; - } ec_rate, sensor_odr, sensor_range, kb_wake_angle; + } ec_rate, sensor_odr, sensor_range, kb_wake_angle, + fifo_int_enable, spoof; - /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ + /* + * Used for MOTIONSENSE_CMD_SENSOR_OFFSET, + * PERFORM_CALIB. + */ struct __ec_todo_unpacked { int16_t temp; int16_t offset[3]; } sensor_offset, perform_calib; + + /* Used for MOTIONSENSE_CMD_SENSOR_SCALE */ + struct __ec_todo_unpacked { + int16_t temp; + uint16_t scale[3]; + } sensor_scale; + + struct ec_response_motion_sense_fifo_info fifo_info, fifo_flush; + + struct ec_response_motion_sense_fifo_data fifo_read; + + struct __ec_todo_packed { + uint16_t reserved; + uint32_t enabled; + uint32_t disabled; + } list_activities; + + /* No params for set activity */ + + /* Used for MOTIONSENSE_CMD_LID_ANGLE */ + struct __ec_todo_unpacked { + /* + * Angle between 0 and 360 degree if available, + * LID_ANGLE_UNRELIABLE otherwise. + */ + uint16_t value; + } lid_angle; + + /* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */ + struct __ec_todo_unpacked { + /* + * Lid angle threshold for switching between tablet and + * clamshell mode. + */ + uint16_t lid_angle; + + /* Hysteresis degree. */ + uint16_t hys_degree; + } tablet_mode_threshold; + }; } __ec_todo_packed; +/*****************************************************************************/ +/* Force lid open command */ + +/* Make lid event always open */ +#define EC_CMD_FORCE_LID_OPEN 0x002C + +struct ec_params_force_lid_open { + uint8_t enabled; +} __ec_align1; + +/*****************************************************************************/ +/* Configure the behavior of the power button */ +#define EC_CMD_CONFIG_POWER_BUTTON 0x002D + +enum ec_config_power_button_flags { + /* Enable/Disable power button pulses for x86 devices */ + EC_POWER_BUTTON_ENABLE_PULSE = BIT(0), +}; + +struct ec_params_config_power_button { + /* See enum ec_config_power_button_flags */ + uint8_t flags; +} __ec_align1; + /*****************************************************************************/ /* USB charging control commands */ @@ -2888,6 +3224,12 @@ union __ec_align_offset1 ec_response_get_next_data { /* Unaligned */ uint32_t host_event; + struct __ec_todo_unpacked { + /* For aligning the fifo_info */ + uint8_t reserved[3]; + struct ec_response_motion_sense_fifo_info info; + } sensor_fifo; + uint32_t buttons; uint32_t switches; uint32_t sysrq; @@ -2896,6 +3238,12 @@ union __ec_align_offset1 ec_response_get_next_data { union __ec_align_offset1 ec_response_get_next_data_v1 { uint8_t key_matrix[16]; uint32_t host_event; + struct __ec_todo_unpacked { + /* For aligning the fifo_info */ + uint8_t reserved[3]; + struct ec_response_motion_sense_fifo_info info; + } sensor_fifo; + uint32_t buttons; uint32_t switches; uint32_t sysrq; -- cgit v1.2.3 From 784dd15c930fd65576200bee225a2796e363c342 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:47 -0700 Subject: mfd: cros_ec: Fix event processing API Improve API between EC and Host to report events. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 87 +++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 701b03cfa445..51fe65170ce6 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -500,7 +500,8 @@ enum host_event_code { EC_HOST_EVENT_BATTERY_CRITICAL = 7, EC_HOST_EVENT_BATTERY = 8, EC_HOST_EVENT_THERMAL_THRESHOLD = 9, - EC_HOST_EVENT_THERMAL_OVERLOAD = 10, + /* Event generated by a device attached to the EC */ + EC_HOST_EVENT_DEVICE = 10, EC_HOST_EVENT_THERMAL = 11, EC_HOST_EVENT_USB_CHARGER = 12, EC_HOST_EVENT_KEY_PRESSED = 13, @@ -527,15 +528,34 @@ enum host_event_code { EC_HOST_EVENT_HANG_DETECT = 20, /* Hang detect logic detected a hang and warm rebooted the AP */ EC_HOST_EVENT_HANG_REBOOT = 21, + /* PD MCU triggering host event */ EC_HOST_EVENT_PD_MCU = 22, - /* EC desires to change state of host-controlled USB mux */ - EC_HOST_EVENT_USB_MUX = 28, + /* Battery Status flags have changed */ + EC_HOST_EVENT_BATTERY_STATUS = 23, + + /* EC encountered a panic, triggering a reset */ + EC_HOST_EVENT_PANIC = 24, + + /* Keyboard fastboot combo has been pressed */ + EC_HOST_EVENT_KEYBOARD_FASTBOOT = 25, /* EC RTC event occurred */ EC_HOST_EVENT_RTC = 26, + /* Emulate MKBP event */ + EC_HOST_EVENT_MKBP = 27, + + /* EC desires to change state of host-controlled USB mux */ + EC_HOST_EVENT_USB_MUX = 28, + + /* TABLET/LAPTOP mode or detachable base attach/detach event */ + EC_HOST_EVENT_MODE_CHANGE = 29, + + /* Keyboard recovery combo with hardware reinitialization */ + EC_HOST_EVENT_KEYBOARD_RECOVERY_HW_REINIT = 30, + /* * The high bit of the event mask is not used as a host event code. If * it reads back as set, then the entire event mask should be @@ -1259,7 +1279,7 @@ enum ec_feature_code { EC_FEATURE_REFINED_TABLET_MODE_HYSTERESIS = 37, /* EC supports audio codec. */ EC_FEATURE_AUDIO_CODEC = 38, - /* EC Supports SCP. */ + /* The MCU is a System Companion Processor (SCP). */ EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ EC_FEATURE_ISH = 40, @@ -3183,12 +3203,23 @@ struct ec_result_keyscan_seq_ctrl { } __ec_todo_packed; /* - * Command for retrieving the next pending MKBP event from the EC device + * Get the next pending MKBP event. * - * The device replies with UNAVAILABLE if there aren't any pending events. + * Returns EC_RES_UNAVAILABLE if there is no event pending. */ #define EC_CMD_GET_NEXT_EVENT 0x0067 +#define EC_MKBP_HAS_MORE_EVENTS_SHIFT 7 + +/* + * We use the most significant bit of the event type to indicate to the host + * that the EC has more MKBP events available to provide. + */ +#define EC_MKBP_HAS_MORE_EVENTS BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT) + +/* The mask to apply to get the raw event type */ +#define EC_MKBP_EVENT_TYPE_MASK (BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT) - 1) + enum ec_mkbp_event { /* Keyboard matrix changed. The event data is the new matrix state. */ EC_MKBP_EVENT_KEY_MATRIX = 0, @@ -3205,9 +3236,21 @@ enum ec_mkbp_event { /* The state of the switches have changed. */ EC_MKBP_EVENT_SWITCH = 4, - /* EC sent a sysrq command */ + /* New Fingerprint sensor event, the event data is fp_events bitmap. */ + EC_MKBP_EVENT_FINGERPRINT = 5, + + /* + * Sysrq event: send emulated sysrq. The event data is sysrq, + * corresponding to the key to be pressed. + */ EC_MKBP_EVENT_SYSRQ = 6, + /* + * New 64-bit host event. + * The event data is 8 bytes of host event flags. + */ + EC_MKBP_EVENT_HOST_EVENT64 = 7, + /* Notify the AP that something happened on CEC */ EC_MKBP_EVENT_CEC_EVENT = 8, @@ -3217,12 +3260,14 @@ enum ec_mkbp_event { /* Number of MKBP events */ EC_MKBP_EVENT_COUNT, }; +BUILD_ASSERT(EC_MKBP_EVENT_COUNT <= EC_MKBP_EVENT_TYPE_MASK); union __ec_align_offset1 ec_response_get_next_data { uint8_t key_matrix[13]; /* Unaligned */ - uint32_t host_event; + uint32_t host_event; + uint64_t host_event64; struct __ec_todo_unpacked { /* For aligning the fifo_info */ @@ -3230,14 +3275,25 @@ union __ec_align_offset1 ec_response_get_next_data { struct ec_response_motion_sense_fifo_info info; } sensor_fifo; - uint32_t buttons; - uint32_t switches; - uint32_t sysrq; + uint32_t buttons; + + uint32_t switches; + + uint32_t fp_events; + + uint32_t sysrq; + + /* CEC events from enum mkbp_cec_event */ + uint32_t cec_events; }; union __ec_align_offset1 ec_response_get_next_data_v1 { uint8_t key_matrix[16]; + + /* Unaligned */ uint32_t host_event; + uint64_t host_event64; + struct __ec_todo_unpacked { /* For aligning the fifo_info */ uint8_t reserved[3]; @@ -3245,11 +3301,19 @@ union __ec_align_offset1 ec_response_get_next_data_v1 { } sensor_fifo; uint32_t buttons; + uint32_t switches; + + uint32_t fp_events; + uint32_t sysrq; + + /* CEC events from enum mkbp_cec_event */ uint32_t cec_events; + uint8_t cec_message[16]; }; +BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16); struct ec_response_get_next_event { uint8_t event_type; @@ -3268,6 +3332,7 @@ struct ec_response_get_next_event_v1 { #define EC_MKBP_POWER_BUTTON 0 #define EC_MKBP_VOL_UP 1 #define EC_MKBP_VOL_DOWN 2 +#define EC_MKBP_RECOVERY 3 /* Switches */ #define EC_MKBP_LID_OPEN 0 -- cgit v1.2.3 From 716bf50ea8b19c397e9d86238c6e7b307cbac3f5 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:48 -0700 Subject: mfd: cros_ec: Add fingerprint API Add support for fingerprint sensors managed by embedded controller. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 51fe65170ce6..d5d07a9957ec 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -3339,6 +3339,40 @@ struct ec_response_get_next_event_v1 { #define EC_MKBP_TABLET_MODE 1 #define EC_MKBP_BASE_ATTACHED 2 +/* Fingerprint events in 'fp_events' for EC_MKBP_EVENT_FINGERPRINT */ +#define EC_MKBP_FP_RAW_EVENT(fp_events) ((fp_events) & 0x00FFFFFF) +#define EC_MKBP_FP_ERRCODE(fp_events) ((fp_events) & 0x0000000F) +#define EC_MKBP_FP_ENROLL_PROGRESS_OFFSET 4 +#define EC_MKBP_FP_ENROLL_PROGRESS(fpe) (((fpe) & 0x00000FF0) \ + >> EC_MKBP_FP_ENROLL_PROGRESS_OFFSET) +#define EC_MKBP_FP_MATCH_IDX_OFFSET 12 +#define EC_MKBP_FP_MATCH_IDX_MASK 0x0000F000 +#define EC_MKBP_FP_MATCH_IDX(fpe) (((fpe) & EC_MKBP_FP_MATCH_IDX_MASK) \ + >> EC_MKBP_FP_MATCH_IDX_OFFSET) +#define EC_MKBP_FP_ENROLL BIT(27) +#define EC_MKBP_FP_MATCH BIT(28) +#define EC_MKBP_FP_FINGER_DOWN BIT(29) +#define EC_MKBP_FP_FINGER_UP BIT(30) +#define EC_MKBP_FP_IMAGE_READY BIT(31) +/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_ENROLL is set */ +#define EC_MKBP_FP_ERR_ENROLL_OK 0 +#define EC_MKBP_FP_ERR_ENROLL_LOW_QUALITY 1 +#define EC_MKBP_FP_ERR_ENROLL_IMMOBILE 2 +#define EC_MKBP_FP_ERR_ENROLL_LOW_COVERAGE 3 +#define EC_MKBP_FP_ERR_ENROLL_INTERNAL 5 +/* Can be used to detect if image was usable for enrollment or not. */ +#define EC_MKBP_FP_ERR_ENROLL_PROBLEM_MASK 1 +/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_MATCH is set */ +#define EC_MKBP_FP_ERR_MATCH_NO 0 +#define EC_MKBP_FP_ERR_MATCH_NO_INTERNAL 6 +#define EC_MKBP_FP_ERR_MATCH_NO_TEMPLATES 7 +#define EC_MKBP_FP_ERR_MATCH_NO_LOW_QUALITY 2 +#define EC_MKBP_FP_ERR_MATCH_NO_LOW_COVERAGE 4 +#define EC_MKBP_FP_ERR_MATCH_YES 1 +#define EC_MKBP_FP_ERR_MATCH_YES_UPDATED 3 +#define EC_MKBP_FP_ERR_MATCH_YES_UPDATE_FAILED 5 + + /*****************************************************************************/ /* Temperature sensor commands */ -- cgit v1.2.3 From 170309b438a3826b09401a84f3de911db5b627a7 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:49 -0700 Subject: mfd: cros_ec: Fix temperature API Improve API to retrieve temperature information. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 64 ++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index d5d07a9957ec..9a84aad7475a 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2945,9 +2945,28 @@ enum ec_temp_thresholds { /* * Thermal configuration for one temperature sensor. Temps are in degrees K. * Zero values will be silently ignored by the thermal task. + * + * Set 'temp_host' value allows thermal task to trigger some event with 1 degree + * hysteresis. + * For example, + * temp_host[EC_TEMP_THRESH_HIGH] = 300 K + * temp_host_release[EC_TEMP_THRESH_HIGH] = 0 K + * EC will throttle ap when temperature >= 301 K, and release throttling when + * temperature <= 299 K. + * + * Set 'temp_host_release' value allows thermal task has a custom hysteresis. + * For example, + * temp_host[EC_TEMP_THRESH_HIGH] = 300 K + * temp_host_release[EC_TEMP_THRESH_HIGH] = 295 K + * EC will throttle ap when temperature >= 301 K, and release throttling when + * temperature <= 294 K. + * + * Note that this structure is a sub-structure of + * ec_params_thermal_set_threshold_v1, but maintains its alignment there. */ struct ec_thermal_config { uint32_t temp_host[EC_TEMP_THRESH_COUNT]; /* levels of hotness */ + uint32_t temp_host_release[EC_TEMP_THRESH_COUNT]; /* release levels */ uint32_t temp_fan_off; /* no active cooling needed */ uint32_t temp_fan_max; /* max active cooling needed */ } __ec_align4; @@ -2973,32 +2992,63 @@ struct ec_params_thermal_set_threshold_v1 { /* Toggle automatic fan control */ #define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052 -/* Get TMP006 calibration data */ +/* Version 1 of input params */ +struct ec_params_auto_fan_ctrl_v1 { + uint8_t fan_idx; +} __ec_align1; + +/* Get/Set TMP006 calibration data */ #define EC_CMD_TMP006_GET_CALIBRATION 0x0053 +#define EC_CMD_TMP006_SET_CALIBRATION 0x0054 + +/* + * The original TMP006 calibration only needed four params, but now we need + * more. Since the algorithm is nothing but magic numbers anyway, we'll leave + * the params opaque. The v1 "get" response will include the algorithm number + * and how many params it requires. That way we can change the EC code without + * needing to update this file. We can also use a different algorithm on each + * sensor. + */ +/* This is the same struct for both v0 and v1. */ struct ec_params_tmp006_get_calibration { uint8_t index; } __ec_align1; -struct ec_response_tmp006_get_calibration { +/* Version 0 */ +struct ec_response_tmp006_get_calibration_v0 { float s0; float b0; float b1; float b2; } __ec_align4; -/* Set TMP006 calibration data */ -#define EC_CMD_TMP006_SET_CALIBRATION 0x0054 - -struct ec_params_tmp006_set_calibration { +struct ec_params_tmp006_set_calibration_v0 { uint8_t index; - uint8_t reserved[3]; /* Reserved; set 0 */ + uint8_t reserved[3]; float s0; float b0; float b1; float b2; } __ec_align4; +/* Version 1 */ +struct ec_response_tmp006_get_calibration_v1 { + uint8_t algorithm; + uint8_t num_params; + uint8_t reserved[2]; + float val[0]; +} __ec_align4; + +struct ec_params_tmp006_set_calibration_v1 { + uint8_t index; + uint8_t algorithm; + uint8_t num_params; + uint8_t reserved; + float val[0]; +} __ec_align4; + + /* Read raw TMP006 data */ #define EC_CMD_TMP006_GET_RAW 0x0055 -- cgit v1.2.3 From e16efdf12105d921b44b78a0012acf2487f3245b Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:50 -0700 Subject: mfd: cros_ec: Complete Power and USB PD API Improve API for USB Powe delivery and power management. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 236 +++++++++++++++++++++++++++++++++-- 1 file changed, 228 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 9a84aad7475a..e05cdcb12481 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2796,7 +2796,8 @@ struct ec_params_config_power_button { struct ec_params_usb_charge_set_mode { uint8_t usb_port_id; - uint8_t mode; + uint8_t mode:7; + uint8_t inhibit_charge:1; } __ec_align1; /*****************************************************************************/ @@ -3933,6 +3934,11 @@ enum charge_state_params { CS_PARAM_CHG_INPUT_CURRENT, /* charger input current limit */ CS_PARAM_CHG_STATUS, /* charger-specific status */ CS_PARAM_CHG_OPTION, /* charger-specific options */ + CS_PARAM_LIMIT_POWER, /* + * Check if power is limited due to + * low battery and / or a weak external + * charger. READ ONLY. + */ /* How many so far? */ CS_NUM_BASE_PARAMS, @@ -3940,6 +3946,17 @@ enum charge_state_params { CS_PARAM_CUSTOM_PROFILE_MIN = 0x10000, CS_PARAM_CUSTOM_PROFILE_MAX = 0x1ffff, + /* Range for CONFIG_CHARGE_STATE_DEBUG params */ + CS_PARAM_DEBUG_MIN = 0x20000, + CS_PARAM_DEBUG_CTL_MODE = 0x20000, + CS_PARAM_DEBUG_MANUAL_MODE, + CS_PARAM_DEBUG_SEEMS_DEAD, + CS_PARAM_DEBUG_SEEMS_DISCONNECTED, + CS_PARAM_DEBUG_BATT_REMOVED, + CS_PARAM_DEBUG_MANUAL_CURRENT, + CS_PARAM_DEBUG_MANUAL_VOLTAGE, + CS_PARAM_DEBUG_MAX = 0x2ffff, + /* Other custom param ranges go here... */ }; @@ -4000,6 +4017,16 @@ struct ec_params_external_power_limit_v1 { #define EC_POWER_LIMIT_NONE 0xffff +/* + * Set maximum voltage & current of a dedicated charge port + */ +#define EC_CMD_OVERRIDE_DEDICATED_CHARGER_LIMIT 0x00A3 + +struct ec_params_dedicated_charger_limit { + uint16_t current_lim; /* in mA */ + uint16_t voltage_lim; /* in mV */ +} __ec_align2; + /* Inform the EC when entering a sleep state */ #define EC_CMD_HOST_SLEEP_EVENT 0x00A9 @@ -4385,18 +4412,53 @@ struct ec_params_reboot_ec { /* EC to PD MCU exchange status command */ #define EC_CMD_PD_EXCHANGE_STATUS 0x0100 +#define EC_VER_PD_EXCHANGE_STATUS 2 + +enum pd_charge_state { + PD_CHARGE_NO_CHANGE = 0, /* Don't change charge state */ + PD_CHARGE_NONE, /* No charging allowed */ + PD_CHARGE_5V, /* 5V charging only */ + PD_CHARGE_MAX /* Charge at max voltage */ +}; /* Status of EC being sent to PD */ +#define EC_STATUS_HIBERNATING BIT(0) + struct ec_params_pd_status { - int8_t batt_soc; /* battery state of charge */ + uint8_t status; /* EC status */ + int8_t batt_soc; /* battery state of charge */ + uint8_t charge_state; /* charging state (from enum pd_charge_state) */ } __ec_align1; /* Status of PD being sent back to EC */ +#define PD_STATUS_HOST_EVENT BIT(0) /* Forward host event to AP */ +#define PD_STATUS_IN_RW BIT(1) /* Running RW image */ +#define PD_STATUS_JUMPED_TO_IMAGE BIT(2) /* Current image was jumped to */ +#define PD_STATUS_TCPC_ALERT_0 BIT(3) /* Alert active in port 0 TCPC */ +#define PD_STATUS_TCPC_ALERT_1 BIT(4) /* Alert active in port 1 TCPC */ +#define PD_STATUS_TCPC_ALERT_2 BIT(5) /* Alert active in port 2 TCPC */ +#define PD_STATUS_TCPC_ALERT_3 BIT(6) /* Alert active in port 3 TCPC */ +#define PD_STATUS_EC_INT_ACTIVE (PD_STATUS_TCPC_ALERT_0 | \ + PD_STATUS_TCPC_ALERT_1 | \ + PD_STATUS_HOST_EVENT) struct ec_response_pd_status { - int8_t status; /* PD MCU status */ - uint32_t curr_lim_ma; /* input current limit */ + uint32_t curr_lim_ma; /* input current limit */ + uint16_t status; /* PD MCU status */ + int8_t active_charge_port; /* active charging port */ } __ec_align_size1; +/* AP to PD MCU host event status command, cleared on read */ +#define EC_CMD_PD_HOST_EVENT_STATUS 0x0104 + +/* PD MCU host event status bits */ +#define PD_EVENT_UPDATE_DEVICE BIT(0) +#define PD_EVENT_POWER_CHANGE BIT(1) +#define PD_EVENT_IDENTITY_RECEIVED BIT(2) +#define PD_EVENT_DATA_SWAP BIT(3) +struct ec_response_host_event_status { + uint32_t status; /* PD MCU host event status */ +} __ec_align4; + /* Set USB type-C port role and muxes */ #define EC_CMD_USB_PD_CONTROL 0x0101 @@ -4406,6 +4468,8 @@ enum usb_pd_control_role { USB_PD_CTRL_ROLE_TOGGLE_OFF = 2, USB_PD_CTRL_ROLE_FORCE_SINK = 3, USB_PD_CTRL_ROLE_FORCE_SOURCE = 4, + USB_PD_CTRL_ROLE_FREEZE = 5, + USB_PD_CTRL_ROLE_COUNT }; enum usb_pd_control_mux { @@ -4415,6 +4479,7 @@ enum usb_pd_control_mux { USB_PD_CTRL_MUX_DP = 3, USB_PD_CTRL_MUX_DOCK = 4, USB_PD_CTRL_MUX_AUTO = 5, + USB_PD_CTRL_MUX_COUNT }; enum usb_pd_control_swap { @@ -4444,6 +4509,13 @@ struct ec_params_usb_pd_control { #define PD_CTRL_RESP_ROLE_USB_COMM BIT(5) /* Partner USB comm capable */ #define PD_CTRL_RESP_ROLE_EXT_POWERED BIT(6) /* Partner externally powerd */ +struct ec_response_usb_pd_control { + uint8_t enabled; + uint8_t role; + uint8_t polarity; + uint8_t state; +} __ec_align1; + struct ec_response_usb_pd_control_v1 { uint8_t enabled; uint8_t role; @@ -4451,6 +4523,25 @@ struct ec_response_usb_pd_control_v1 { char state[32]; } __ec_align1; +/* Values representing usbc PD CC state */ +#define USBC_PD_CC_NONE 0 /* No accessory connected */ +#define USBC_PD_CC_NO_UFP 1 /* No UFP accessory connected */ +#define USBC_PD_CC_AUDIO_ACC 2 /* Audio accessory connected */ +#define USBC_PD_CC_DEBUG_ACC 3 /* Debug accessory connected */ +#define USBC_PD_CC_UFP_ATTACHED 4 /* UFP attached to usbc */ +#define USBC_PD_CC_DFP_ATTACHED 5 /* DPF attached to usbc */ + +struct ec_response_usb_pd_control_v2 { + uint8_t enabled; + uint8_t role; + uint8_t polarity; + char state[32]; + uint8_t cc_state; /* USBC_PD_CC_*Encoded cc state */ + uint8_t dp_mode; /* Current DP pin mode (MODE_DP_PIN_[A-E]) */ + /* CL:1500994 Current cable type */ + uint8_t reserved_cable_type; +} __ec_align1; + #define EC_CMD_USB_PD_PORTS 0x0102 /* Maximum number of PD ports on a device, num_ports will be <= this */ @@ -4478,6 +4569,7 @@ enum usb_chg_type { USB_CHG_TYPE_OTHER, USB_CHG_TYPE_VBUS, USB_CHG_TYPE_UNKNOWN, + USB_CHG_TYPE_DEDICATED, }; enum usb_power_roles { USB_PD_PORT_POWER_DISCONNECTED, @@ -4502,9 +4594,6 @@ struct ec_response_usb_pd_power_info { uint32_t max_power; } __ec_align4; -struct ec_params_usb_pd_info_request { - uint8_t port; -} __ec_align1; /* * This command will return the number of USB PD charge port + the number @@ -4516,6 +4605,46 @@ struct ec_response_charge_port_count { uint8_t port_count; } __ec_align1; +/* Write USB-PD device FW */ +#define EC_CMD_USB_PD_FW_UPDATE 0x0110 + +enum usb_pd_fw_update_cmds { + USB_PD_FW_REBOOT, + USB_PD_FW_FLASH_ERASE, + USB_PD_FW_FLASH_WRITE, + USB_PD_FW_ERASE_SIG, +}; + +struct ec_params_usb_pd_fw_update { + uint16_t dev_id; + uint8_t cmd; + uint8_t port; + uint32_t size; /* Size to write in bytes */ + /* Followed by data to write */ +} __ec_align4; + +/* Write USB-PD Accessory RW_HASH table entry */ +#define EC_CMD_USB_PD_RW_HASH_ENTRY 0x0111 +/* RW hash is first 20 bytes of SHA-256 of RW section */ +#define PD_RW_HASH_SIZE 20 +struct ec_params_usb_pd_rw_hash_entry { + uint16_t dev_id; + uint8_t dev_rw_hash[PD_RW_HASH_SIZE]; + uint8_t reserved; /* + * For alignment of current_image + * TODO(rspangler) but it's not aligned! + * Should have been reserved[2]. + */ + uint32_t current_image; /* One of ec_current_image */ +} __ec_align1; + +/* Read USB-PD Accessory info */ +#define EC_CMD_USB_PD_DEV_INFO 0x0112 + +struct ec_params_usb_pd_info_request { + uint8_t port; +} __ec_align1; + /* Read USB-PD Device discovery info */ #define EC_CMD_USB_PD_DISCOVERY 0x0113 struct ec_params_usb_pd_discovery_entry { @@ -4538,7 +4667,11 @@ struct ec_params_charge_port_override { int16_t override_port; /* Override port# */ } __ec_align2; -/* Read (and delete) one entry of PD event log */ +/* + * Read (and delete) one entry of PD event log. + * TODO(crbug.com/751742): Make this host command more generic to accommodate + * future non-PD logs that use the same internal EC event_log. + */ #define EC_CMD_PD_GET_LOG_ENTRY 0x0115 struct ec_response_pd_log { @@ -4626,6 +4759,60 @@ struct mcdp_info { #define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1]) #define MCDP_FAMILY(family) ((family[0] << 8) | family[1]) +/* Get/Set USB-PD Alternate mode info */ +#define EC_CMD_USB_PD_GET_AMODE 0x0116 +struct ec_params_usb_pd_get_mode_request { + uint16_t svid_idx; /* SVID index to get */ + uint8_t port; /* port */ +} __ec_align_size1; + +struct ec_params_usb_pd_get_mode_response { + uint16_t svid; /* SVID */ + uint16_t opos; /* Object Position */ + uint32_t vdo[6]; /* Mode VDOs */ +} __ec_align4; + +#define EC_CMD_USB_PD_SET_AMODE 0x0117 + +enum pd_mode_cmd { + PD_EXIT_MODE = 0, + PD_ENTER_MODE = 1, + /* Not a command. Do NOT remove. */ + PD_MODE_CMD_COUNT, +}; + +struct ec_params_usb_pd_set_mode_request { + uint32_t cmd; /* enum pd_mode_cmd */ + uint16_t svid; /* SVID to set */ + uint8_t opos; /* Object Position */ + uint8_t port; /* port */ +} __ec_align4; + +/* Ask the PD MCU to record a log of a requested type */ +#define EC_CMD_PD_WRITE_LOG_ENTRY 0x0118 + +struct ec_params_pd_write_log_entry { + uint8_t type; /* event type : see PD_EVENT_xx above */ + uint8_t port; /* port#, or 0 for events unrelated to a given port */ +} __ec_align1; + + +/* Control USB-PD chip */ +#define EC_CMD_PD_CONTROL 0x0119 + +enum ec_pd_control_cmd { + PD_SUSPEND = 0, /* Suspend the PD chip (EC: stop talking to PD) */ + PD_RESUME, /* Resume the PD chip (EC: start talking to PD) */ + PD_RESET, /* Force reset the PD chip */ + PD_CONTROL_DISABLE, /* Disable further calls to this command */ + PD_CHIP_ON, /* Power on the PD chip */ +}; + +struct ec_params_pd_control { + uint8_t chip; /* chip id */ + uint8_t subcmd; +} __ec_align1; + /* Get info about USB-C SS muxes */ #define EC_CMD_USB_PD_MUX_INFO 0x011A @@ -4638,10 +4825,43 @@ struct ec_params_usb_pd_mux_info { #define USB_PD_MUX_DP_ENABLED BIT(1) /* DP connected */ #define USB_PD_MUX_POLARITY_INVERTED BIT(2) /* CC line Polarity inverted */ #define USB_PD_MUX_HPD_IRQ BIT(3) /* HPD IRQ is asserted */ +#define USB_PD_MUX_HPD_LVL BIT(4) /* HPD level is asserted */ struct ec_response_usb_pd_mux_info { uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */ } __ec_align1; + +#define EC_CMD_PD_CHIP_INFO 0x011B + +struct ec_params_pd_chip_info { + uint8_t port; /* USB-C port number */ + uint8_t renew; /* Force renewal */ +} __ec_align1; + +struct ec_response_pd_chip_info { + uint16_t vendor_id; + uint16_t product_id; + uint16_t device_id; + union { + uint8_t fw_version_string[8]; + uint64_t fw_version_number; + }; +} __ec_align2; + +struct ec_response_pd_chip_info_v1 { + uint16_t vendor_id; + uint16_t product_id; + uint16_t device_id; + union { + uint8_t fw_version_string[8]; + uint64_t fw_version_number; + }; + union { + uint8_t min_req_fw_version_string[8]; + uint64_t min_req_fw_version_number; + }; +} __ec_align2; + /*****************************************************************************/ /* * Reserve a range of host commands for board-specific, experimental, or -- cgit v1.2.3 From fd5372848a6fab0f4ec80c554f384cb1a8c19bb7 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:51 -0700 Subject: mfd: cros_ec: Add API for keyboard testing Add command to allow keyboard testing in factory. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index e05cdcb12481..cc054a0a4c4c 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -3142,6 +3142,17 @@ struct ec_params_mkbp_simulate_key { uint8_t pressed; } __ec_align1; +#define EC_CMD_GET_KEYBOARD_ID 0x0063 + +struct ec_response_keyboard_id { + uint32_t keyboard_id; +} __ec_align4; + +enum keyboard_id { + KEYBOARD_ID_UNSUPPORTED = 0, + KEYBOARD_ID_UNREADABLE = 0xffffffff, +}; + /* Configure keyboard scanning */ #define EC_CMD_MKBP_SET_CONFIG 0x0064 #define EC_CMD_MKBP_GET_CONFIG 0x0065 @@ -3390,6 +3401,13 @@ struct ec_response_get_next_event_v1 { #define EC_MKBP_TABLET_MODE 1 #define EC_MKBP_BASE_ATTACHED 2 +/* Run keyboard factory test scanning */ +#define EC_CMD_KEYBOARD_FACTORY_TEST 0x0068 + +struct ec_response_keyboard_factory_test { + uint16_t shorted; /* Keyboard pins are shorted */ +} __ec_align2; + /* Fingerprint events in 'fp_events' for EC_MKBP_EVENT_FINGERPRINT */ #define EC_MKBP_FP_RAW_EVENT(fp_events) ((fp_events) & 0x00FFFFFF) #define EC_MKBP_FP_ERRCODE(fp_events) ((fp_events) & 0x0000000F) -- cgit v1.2.3 From b92be99f37427fbc5deb6cb1a246f096e302f92d Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:52 -0700 Subject: mfd: cros_ec: Add Hibernate API Add support for controlling hibernation of the Embedded Controller. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 72 +++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index cc054a0a4c4c..7f98c6e63ad1 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -4045,6 +4045,40 @@ struct ec_params_dedicated_charger_limit { uint16_t voltage_lim; /* in mV */ } __ec_align2; +/*****************************************************************************/ +/* Hibernate/Deep Sleep Commands */ + +/* Set the delay before going into hibernation. */ +#define EC_CMD_HIBERNATION_DELAY 0x00A8 + +struct ec_params_hibernation_delay { + /* + * Seconds to wait in G3 before hibernate. Pass in 0 to read the + * current settings without changing them. + */ + uint32_t seconds; +} __ec_align4; + +struct ec_response_hibernation_delay { + /* + * The current time in seconds in which the system has been in the G3 + * state. This value is reset if the EC transitions out of G3. + */ + uint32_t time_g3; + + /* + * The current time remaining in seconds until the EC should hibernate. + * This value is also reset if the EC transitions out of G3. + */ + uint32_t time_remaining; + + /* + * The current time in seconds that the EC should wait in G3 before + * hibernating. + */ + uint32_t hibernate_delay; +} __ec_align4; + /* Inform the EC when entering a sleep state */ #define EC_CMD_HOST_SLEEP_EVENT 0x00A9 @@ -4052,7 +4086,9 @@ enum host_sleep_event { HOST_SLEEP_EVENT_S3_SUSPEND = 1, HOST_SLEEP_EVENT_S3_RESUME = 2, HOST_SLEEP_EVENT_S0IX_SUSPEND = 3, - HOST_SLEEP_EVENT_S0IX_RESUME = 4 + HOST_SLEEP_EVENT_S0IX_RESUME = 4, + /* S3 suspend with additional enabled wake sources */ + HOST_SLEEP_EVENT_S3_WAKEABLE_SUSPEND = 5, }; struct ec_params_host_sleep_event { @@ -4116,6 +4152,36 @@ struct ec_response_host_sleep_event_v1 { }; } __ec_align4; +/*****************************************************************************/ +/* Device events */ +#define EC_CMD_DEVICE_EVENT 0x00AA + +enum ec_device_event { + EC_DEVICE_EVENT_TRACKPAD, + EC_DEVICE_EVENT_DSP, + EC_DEVICE_EVENT_WIFI, +}; + +enum ec_device_event_param { + /* Get and clear pending device events */ + EC_DEVICE_EVENT_PARAM_GET_CURRENT_EVENTS, + /* Get device event mask */ + EC_DEVICE_EVENT_PARAM_GET_ENABLED_EVENTS, + /* Set device event mask */ + EC_DEVICE_EVENT_PARAM_SET_ENABLED_EVENTS, +}; + +#define EC_DEVICE_EVENT_MASK(event_code) BIT(event_code % 32) + +struct ec_params_device_event { + uint32_t event_mask; + uint8_t param; +} __ec_align_size1; + +struct ec_response_device_event { + uint32_t event_mask; +} __ec_align4; + /*****************************************************************************/ /* Smart battery pass-through */ @@ -4361,12 +4427,14 @@ enum ec_reboot_cmd { /* (command 3 was jump to RW-B) */ EC_REBOOT_COLD = 4, /* Cold-reboot */ EC_REBOOT_DISABLE_JUMP = 5, /* Disable jump until next reboot */ - EC_REBOOT_HIBERNATE = 6 /* Hibernate EC */ + EC_REBOOT_HIBERNATE = 6, /* Hibernate EC */ + EC_REBOOT_HIBERNATE_CLEAR_AP_OFF = 7, /* and clears AP_OFF flag */ }; /* Flags for ec_params_reboot_ec.reboot_flags */ #define EC_REBOOT_FLAG_RESERVED0 BIT(0) /* Was recovery request */ #define EC_REBOOT_FLAG_ON_AP_SHUTDOWN BIT(1) /* Reboot after AP shutdown */ +#define EC_REBOOT_FLAG_SWITCH_RW_SLOT BIT(2) /* Switch RW slot */ struct ec_params_reboot_ec { uint8_t cmd; /* enum ec_reboot_cmd */ -- cgit v1.2.3 From 77c48c76ad91cf774df26c6e5c74c76842943802 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:53 -0700 Subject: mfd: cros_ec: Add Smart Battery Firmware update API Add API to update battery firmware. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 7f98c6e63ad1..49ea905cfd18 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -4243,6 +4243,79 @@ struct ec_response_battery_vendor_param { uint32_t value; } __ec_align4; +/*****************************************************************************/ +/* + * Smart Battery Firmware Update Commands + */ +#define EC_CMD_SB_FW_UPDATE 0x00B5 + +enum ec_sb_fw_update_subcmd { + EC_SB_FW_UPDATE_PREPARE = 0x0, + EC_SB_FW_UPDATE_INFO = 0x1, /*query sb info */ + EC_SB_FW_UPDATE_BEGIN = 0x2, /*check if protected */ + EC_SB_FW_UPDATE_WRITE = 0x3, /*check if protected */ + EC_SB_FW_UPDATE_END = 0x4, + EC_SB_FW_UPDATE_STATUS = 0x5, + EC_SB_FW_UPDATE_PROTECT = 0x6, + EC_SB_FW_UPDATE_MAX = 0x7, +}; + +#define SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE 32 +#define SB_FW_UPDATE_CMD_STATUS_SIZE 2 +#define SB_FW_UPDATE_CMD_INFO_SIZE 8 + +struct ec_sb_fw_update_header { + uint16_t subcmd; /* enum ec_sb_fw_update_subcmd */ + uint16_t fw_id; /* firmware id */ +} __ec_align4; + +struct ec_params_sb_fw_update { + struct ec_sb_fw_update_header hdr; + union { + /* EC_SB_FW_UPDATE_PREPARE = 0x0 */ + /* EC_SB_FW_UPDATE_INFO = 0x1 */ + /* EC_SB_FW_UPDATE_BEGIN = 0x2 */ + /* EC_SB_FW_UPDATE_END = 0x4 */ + /* EC_SB_FW_UPDATE_STATUS = 0x5 */ + /* EC_SB_FW_UPDATE_PROTECT = 0x6 */ + /* Those have no args */ + + /* EC_SB_FW_UPDATE_WRITE = 0x3 */ + struct __ec_align4 { + uint8_t data[SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE]; + } write; + }; +} __ec_align4; + +struct ec_response_sb_fw_update { + union { + /* EC_SB_FW_UPDATE_INFO = 0x1 */ + struct __ec_align1 { + uint8_t data[SB_FW_UPDATE_CMD_INFO_SIZE]; + } info; + + /* EC_SB_FW_UPDATE_STATUS = 0x5 */ + struct __ec_align1 { + uint8_t data[SB_FW_UPDATE_CMD_STATUS_SIZE]; + } status; + }; +} __ec_align1; + +/* + * Entering Verified Boot Mode Command + * Default mode is VBOOT_MODE_NORMAL if EC did not receive this command. + * Valid Modes are: normal, developer, and recovery. + */ +#define EC_CMD_ENTERING_MODE 0x00B6 + +struct ec_params_entering_mode { + int vboot_mode; +} __ec_align4; + +#define VBOOT_MODE_NORMAL 0 +#define VBOOT_MODE_DEVELOPER 1 +#define VBOOT_MODE_RECOVERY 2 + /*****************************************************************************/ /* * HDMI CEC commands -- cgit v1.2.3 From a47bc8a4e88b35d9ea97ecd773cd7e0688b3322a Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:54 -0700 Subject: mfd: cros_ec: Add I2C passthru protection API Prevent direct i2c access to device behind EC when not in development mode. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 49ea905cfd18..59ad6bae3f9b 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -4316,6 +4316,28 @@ struct ec_params_entering_mode { #define VBOOT_MODE_DEVELOPER 1 #define VBOOT_MODE_RECOVERY 2 +/*****************************************************************************/ +/* + * I2C passthru protection command: Protects I2C tunnels against access on + * certain addresses (board-specific). + */ +#define EC_CMD_I2C_PASSTHRU_PROTECT 0x00B7 + +enum ec_i2c_passthru_protect_subcmd { + EC_CMD_I2C_PASSTHRU_PROTECT_STATUS = 0x0, + EC_CMD_I2C_PASSTHRU_PROTECT_ENABLE = 0x1, +}; + +struct ec_params_i2c_passthru_protect { + uint8_t subcmd; + uint8_t port; /* I2C port number */ +} __ec_align1; + +struct ec_response_i2c_passthru_protect { + uint8_t status; /* Status flags (0: unlocked, 1: locked) */ +} __ec_align1; + + /*****************************************************************************/ /* * HDMI CEC commands -- cgit v1.2.3 From d90a4121bf98d959a01306599b370d8f883a0737 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:55 -0700 Subject: mfd: cros_ec: Add API for EC-EC communication Allow EC to talk to other ECs that are not presented to the host. Neeed when EC are present in detachable keyboard. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 95 ++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 59ad6bae3f9b..52fd9bfafc7f 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -5043,6 +5043,101 @@ struct ec_response_pd_chip_info_v1 { }; } __ec_align2; +/*****************************************************************************/ +/* EC-EC communication commands: range 0x0600-0x06FF */ + +#define EC_COMM_TEXT_MAX 8 + +/* + * Get battery static information, i.e. information that never changes, or + * very infrequently. + */ +#define EC_CMD_BATTERY_GET_STATIC 0x0600 + +/** + * struct ec_params_battery_static_info - Battery static info parameters + * @index: Battery index. + */ +struct ec_params_battery_static_info { + uint8_t index; +} __ec_align_size1; + +/** + * struct ec_response_battery_static_info - Battery static info response + * @design_capacity: Battery Design Capacity (mAh) + * @design_voltage: Battery Design Voltage (mV) + * @manufacturer: Battery Manufacturer String + * @model: Battery Model Number String + * @serial: Battery Serial Number String + * @type: Battery Type String + * @cycle_count: Battery Cycle Count + */ +struct ec_response_battery_static_info { + uint16_t design_capacity; + uint16_t design_voltage; + char manufacturer[EC_COMM_TEXT_MAX]; + char model[EC_COMM_TEXT_MAX]; + char serial[EC_COMM_TEXT_MAX]; + char type[EC_COMM_TEXT_MAX]; + /* TODO(crbug.com/795991): Consider moving to dynamic structure. */ + uint32_t cycle_count; +} __ec_align4; + +/* + * Get battery dynamic information, i.e. information that is likely to change + * every time it is read. + */ +#define EC_CMD_BATTERY_GET_DYNAMIC 0x0601 + +/** + * struct ec_params_battery_dynamic_info - Battery dynamic info parameters + * @index: Battery index. + */ +struct ec_params_battery_dynamic_info { + uint8_t index; +} __ec_align_size1; + +/** + * struct ec_response_battery_dynamic_info - Battery dynamic info response + * @actual_voltage: Battery voltage (mV) + * @actual_current: Battery current (mA); negative=discharging + * @remaining_capacity: Remaining capacity (mAh) + * @full_capacity: Capacity (mAh, might change occasionally) + * @flags: Flags, see EC_BATT_FLAG_* + * @desired_voltage: Charging voltage desired by battery (mV) + * @desired_current: Charging current desired by battery (mA) + */ +struct ec_response_battery_dynamic_info { + int16_t actual_voltage; + int16_t actual_current; + int16_t remaining_capacity; + int16_t full_capacity; + int16_t flags; + int16_t desired_voltage; + int16_t desired_current; +} __ec_align2; + +/* + * Control charger chip. Used to control charger chip on the slave. + */ +#define EC_CMD_CHARGER_CONTROL 0x0602 + +/** + * struct ec_params_charger_control - Charger control parameters + * @max_current: Charger current (mA). Positive to allow base to draw up to + * max_current and (possibly) charge battery, negative to request current + * from base (OTG). + * @otg_voltage: Voltage (mV) to use in OTG mode, ignored if max_current is + * >= 0. + * @allow_charging: Allow base battery charging (only makes sense if + * max_current > 0). + */ +struct ec_params_charger_control { + int16_t max_current; + uint16_t otg_voltage; + uint8_t allow_charging; +} __ec_align_size1; + /*****************************************************************************/ /* * Reserve a range of host commands for board-specific, experimental, or -- cgit v1.2.3 From 6f9d485ca4c5d3ac223a1e49f604192be12e0676 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:56 -0700 Subject: mfd: cros_ec: Add API for Touchpad support Add API to control touchpad presented by Embedded Controller. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 52fd9bfafc7f..1d0311df44d3 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -5043,6 +5043,32 @@ struct ec_response_pd_chip_info_v1 { }; } __ec_align2; +/*****************************************************************************/ +/* Touchpad MCU commands: range 0x0500-0x05FF */ + +/* Perform touchpad self test */ +#define EC_CMD_TP_SELF_TEST 0x0500 + +/* Get number of frame types, and the size of each type */ +#define EC_CMD_TP_FRAME_INFO 0x0501 + +struct ec_response_tp_frame_info { + uint32_t n_frames; + uint32_t frame_sizes[0]; +} __ec_align4; + +/* Create a snapshot of current frame readings */ +#define EC_CMD_TP_FRAME_SNAPSHOT 0x0502 + +/* Read the frame */ +#define EC_CMD_TP_FRAME_GET 0x0503 + +struct ec_params_tp_frame_get { + uint32_t frame_index; + uint32_t offset; + uint32_t size; +} __ec_align4; + /*****************************************************************************/ /* EC-EC communication commands: range 0x0600-0x06FF */ -- cgit v1.2.3 From da038d6ee7a4e791eba6b1954a6c49f4f2856786 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:57 -0700 Subject: mfd: cros_ec: Add API for Fingerprint support Add API for fingerprint sensor presented by embedded controller. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 228 +++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 1d0311df44d3..4a9ac3861bdd 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -5043,6 +5043,234 @@ struct ec_response_pd_chip_info_v1 { }; } __ec_align2; +/*****************************************************************************/ +/* Fingerprint MCU commands: range 0x0400-0x040x */ + +/* Fingerprint SPI sensor passthru command: prototyping ONLY */ +#define EC_CMD_FP_PASSTHRU 0x0400 + +#define EC_FP_FLAG_NOT_COMPLETE 0x1 + +struct ec_params_fp_passthru { + uint16_t len; /* Number of bytes to write then read */ + uint16_t flags; /* EC_FP_FLAG_xxx */ + uint8_t data[]; /* Data to send */ +} __ec_align2; + +/* Configure the Fingerprint MCU behavior */ +#define EC_CMD_FP_MODE 0x0402 + +/* Put the sensor in its lowest power mode */ +#define FP_MODE_DEEPSLEEP BIT(0) +/* Wait to see a finger on the sensor */ +#define FP_MODE_FINGER_DOWN BIT(1) +/* Poll until the finger has left the sensor */ +#define FP_MODE_FINGER_UP BIT(2) +/* Capture the current finger image */ +#define FP_MODE_CAPTURE BIT(3) +/* Finger enrollment session on-going */ +#define FP_MODE_ENROLL_SESSION BIT(4) +/* Enroll the current finger image */ +#define FP_MODE_ENROLL_IMAGE BIT(5) +/* Try to match the current finger image */ +#define FP_MODE_MATCH BIT(6) +/* Reset and re-initialize the sensor. */ +#define FP_MODE_RESET_SENSOR BIT(7) +/* special value: don't change anything just read back current mode */ +#define FP_MODE_DONT_CHANGE BIT(31) + +#define FP_VALID_MODES (FP_MODE_DEEPSLEEP | \ + FP_MODE_FINGER_DOWN | \ + FP_MODE_FINGER_UP | \ + FP_MODE_CAPTURE | \ + FP_MODE_ENROLL_SESSION | \ + FP_MODE_ENROLL_IMAGE | \ + FP_MODE_MATCH | \ + FP_MODE_RESET_SENSOR | \ + FP_MODE_DONT_CHANGE) + +/* Capture types defined in bits [30..28] */ +#define FP_MODE_CAPTURE_TYPE_SHIFT 28 +#define FP_MODE_CAPTURE_TYPE_MASK (0x7 << FP_MODE_CAPTURE_TYPE_SHIFT) +/* + * This enum must remain ordered, if you add new values you must ensure that + * FP_CAPTURE_TYPE_MAX is still the last one. + */ +enum fp_capture_type { + /* Full blown vendor-defined capture (produces 'frame_size' bytes) */ + FP_CAPTURE_VENDOR_FORMAT = 0, + /* Simple raw image capture (produces width x height x bpp bits) */ + FP_CAPTURE_SIMPLE_IMAGE = 1, + /* Self test pattern (e.g. checkerboard) */ + FP_CAPTURE_PATTERN0 = 2, + /* Self test pattern (e.g. inverted checkerboard) */ + FP_CAPTURE_PATTERN1 = 3, + /* Capture for Quality test with fixed contrast */ + FP_CAPTURE_QUALITY_TEST = 4, + /* Capture for pixel reset value test */ + FP_CAPTURE_RESET_TEST = 5, + FP_CAPTURE_TYPE_MAX, +}; +/* Extracts the capture type from the sensor 'mode' word */ +#define FP_CAPTURE_TYPE(mode) (((mode) & FP_MODE_CAPTURE_TYPE_MASK) \ + >> FP_MODE_CAPTURE_TYPE_SHIFT) + +struct ec_params_fp_mode { + uint32_t mode; /* as defined by FP_MODE_ constants */ +} __ec_align4; + +struct ec_response_fp_mode { + uint32_t mode; /* as defined by FP_MODE_ constants */ +} __ec_align4; + +/* Retrieve Fingerprint sensor information */ +#define EC_CMD_FP_INFO 0x0403 + +/* Number of dead pixels detected on the last maintenance */ +#define FP_ERROR_DEAD_PIXELS(errors) ((errors) & 0x3FF) +/* Unknown number of dead pixels detected on the last maintenance */ +#define FP_ERROR_DEAD_PIXELS_UNKNOWN (0x3FF) +/* No interrupt from the sensor */ +#define FP_ERROR_NO_IRQ BIT(12) +/* SPI communication error */ +#define FP_ERROR_SPI_COMM BIT(13) +/* Invalid sensor Hardware ID */ +#define FP_ERROR_BAD_HWID BIT(14) +/* Sensor initialization failed */ +#define FP_ERROR_INIT_FAIL BIT(15) + +struct ec_response_fp_info_v0 { + /* Sensor identification */ + uint32_t vendor_id; + uint32_t product_id; + uint32_t model_id; + uint32_t version; + /* Image frame characteristics */ + uint32_t frame_size; + uint32_t pixel_format; /* using V4L2_PIX_FMT_ */ + uint16_t width; + uint16_t height; + uint16_t bpp; + uint16_t errors; /* see FP_ERROR_ flags above */ +} __ec_align4; + +struct ec_response_fp_info { + /* Sensor identification */ + uint32_t vendor_id; + uint32_t product_id; + uint32_t model_id; + uint32_t version; + /* Image frame characteristics */ + uint32_t frame_size; + uint32_t pixel_format; /* using V4L2_PIX_FMT_ */ + uint16_t width; + uint16_t height; + uint16_t bpp; + uint16_t errors; /* see FP_ERROR_ flags above */ + /* Template/finger current information */ + uint32_t template_size; /* max template size in bytes */ + uint16_t template_max; /* maximum number of fingers/templates */ + uint16_t template_valid; /* number of valid fingers/templates */ + uint32_t template_dirty; /* bitmap of templates with MCU side changes */ + uint32_t template_version; /* version of the template format */ +} __ec_align4; + +/* Get the last captured finger frame or a template content */ +#define EC_CMD_FP_FRAME 0x0404 + +/* constants defining the 'offset' field which also contains the frame index */ +#define FP_FRAME_INDEX_SHIFT 28 +/* Frame buffer where the captured image is stored */ +#define FP_FRAME_INDEX_RAW_IMAGE 0 +/* First frame buffer holding a template */ +#define FP_FRAME_INDEX_TEMPLATE 1 +#define FP_FRAME_GET_BUFFER_INDEX(offset) ((offset) >> FP_FRAME_INDEX_SHIFT) +#define FP_FRAME_OFFSET_MASK 0x0FFFFFFF + +/* Version of the format of the encrypted templates. */ +#define FP_TEMPLATE_FORMAT_VERSION 3 + +/* Constants for encryption parameters */ +#define FP_CONTEXT_NONCE_BYTES 12 +#define FP_CONTEXT_USERID_WORDS (32 / sizeof(uint32_t)) +#define FP_CONTEXT_TAG_BYTES 16 +#define FP_CONTEXT_SALT_BYTES 16 +#define FP_CONTEXT_TPM_BYTES 32 + +struct ec_fp_template_encryption_metadata { + /* + * Version of the structure format (N=3). + */ + uint16_t struct_version; + /* Reserved bytes, set to 0. */ + uint16_t reserved; + /* + * The salt is *only* ever used for key derivation. The nonce is unique, + * a different one is used for every message. + */ + uint8_t nonce[FP_CONTEXT_NONCE_BYTES]; + uint8_t salt[FP_CONTEXT_SALT_BYTES]; + uint8_t tag[FP_CONTEXT_TAG_BYTES]; +}; + +struct ec_params_fp_frame { + /* + * The offset contains the template index or FP_FRAME_INDEX_RAW_IMAGE + * in the high nibble, and the real offset within the frame in + * FP_FRAME_OFFSET_MASK. + */ + uint32_t offset; + uint32_t size; +} __ec_align4; + +/* Load a template into the MCU */ +#define EC_CMD_FP_TEMPLATE 0x0405 + +/* Flag in the 'size' field indicating that the full template has been sent */ +#define FP_TEMPLATE_COMMIT 0x80000000 + +struct ec_params_fp_template { + uint32_t offset; + uint32_t size; + uint8_t data[]; +} __ec_align4; + +/* Clear the current fingerprint user context and set a new one */ +#define EC_CMD_FP_CONTEXT 0x0406 + +struct ec_params_fp_context { + uint32_t userid[FP_CONTEXT_USERID_WORDS]; +} __ec_align4; + +#define EC_CMD_FP_STATS 0x0407 + +#define FPSTATS_CAPTURE_INV BIT(0) +#define FPSTATS_MATCHING_INV BIT(1) + +struct ec_response_fp_stats { + uint32_t capture_time_us; + uint32_t matching_time_us; + uint32_t overall_time_us; + struct { + uint32_t lo; + uint32_t hi; + } overall_t0; + uint8_t timestamps_invalid; + int8_t template_matched; +} __ec_align2; + +#define EC_CMD_FP_SEED 0x0408 +struct ec_params_fp_seed { + /* + * Version of the structure format (N=3). + */ + uint16_t struct_version; + /* Reserved bytes, set to 0. */ + uint16_t reserved; + /* Seed from the TPM. */ + uint8_t seed[FP_CONTEXT_TPM_BYTES]; +} __ec_align4; + /*****************************************************************************/ /* Touchpad MCU commands: range 0x0500-0x05FF */ -- cgit v1.2.3 From a0d50b31cee948de1be0ad14b78127a00530f43e Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:58 -0700 Subject: mfd: cros_ec: Add API for rwsig Add command to retrieve signature of image stored in the RW memory slot(s). Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 4a9ac3861bdd..3d3a37b11002 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -5043,6 +5043,32 @@ struct ec_response_pd_chip_info_v1 { }; } __ec_align2; +/* Run RW signature verification and get status */ +#define EC_CMD_RWSIG_CHECK_STATUS 0x011C + +struct ec_response_rwsig_check_status { + uint32_t status; +} __ec_align4; + +/* For controlling RWSIG task */ +#define EC_CMD_RWSIG_ACTION 0x011D + +enum rwsig_action { + RWSIG_ACTION_ABORT = 0, /* Abort RWSIG and prevent jumping */ + RWSIG_ACTION_CONTINUE = 1, /* Jump to RW immediately */ +}; + +struct ec_params_rwsig_action { + uint32_t action; +} __ec_align4; + +/* Run verification on a slot */ +#define EC_CMD_EFS_VERIFY 0x011E + +struct ec_params_efs_verify { + uint8_t region; /* enum ec_flash_region */ +} __ec_align1; + /*****************************************************************************/ /* Fingerprint MCU commands: range 0x0400-0x040x */ -- cgit v1.2.3 From cc3a032fd7128d03715c655ad66a263b6d518071 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:33:59 -0700 Subject: mfd: cros_ec: Add SKU ID and Secure storage API Add API to store SKU, Cros board information in EC flash memory. Add API to store security data in EC. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 107 +++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 3d3a37b11002..860a76274334 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1292,6 +1292,17 @@ struct ec_response_get_features { uint32_t flags[2]; } __ec_align4; +/*****************************************************************************/ +/* Get the board's SKU ID from EC */ +#define EC_CMD_GET_SKU_ID 0x000E + +/* Set SKU ID from AP */ +#define EC_CMD_SET_SKU_ID 0x000F + +struct ec_sku_id_info { + uint32_t sku_id; +} __ec_align4; + /*****************************************************************************/ /* Flash commands */ @@ -2902,6 +2913,49 @@ struct ec_response_port80_last_boot { uint16_t code; } __ec_align2; +/*****************************************************************************/ +/* Temporary secure storage for host verified boot use */ + +/* Number of bytes in a vstore slot */ +#define EC_VSTORE_SLOT_SIZE 64 + +/* Maximum number of vstore slots */ +#define EC_VSTORE_SLOT_MAX 32 + +/* Get persistent storage info */ +#define EC_CMD_VSTORE_INFO 0x0049 +struct ec_response_vstore_info { + /* Indicates which slots are locked */ + uint32_t slot_locked; + /* Total number of slots available */ + uint8_t slot_count; +} __ec_align_size1; + +/* + * Read temporary secure storage + * + * Response is EC_VSTORE_SLOT_SIZE bytes of data. + */ +#define EC_CMD_VSTORE_READ 0x004A + +struct ec_params_vstore_read { + uint8_t slot; /* Slot to read from */ +} __ec_align1; + +struct ec_response_vstore_read { + uint8_t data[EC_VSTORE_SLOT_SIZE]; +} __ec_align1; + +/* + * Write temporary secure storage and lock it. + */ +#define EC_CMD_VSTORE_WRITE 0x004B + +struct ec_params_vstore_write { + uint8_t slot; /* Slot to write to */ + uint8_t data[EC_VSTORE_SLOT_SIZE]; +} __ec_align1; + /*****************************************************************************/ /* Thermal engine commands. Note that there are two implementations. We'll * reuse the command number, but the data and behavior is incompatible. @@ -5069,6 +5123,59 @@ struct ec_params_efs_verify { uint8_t region; /* enum ec_flash_region */ } __ec_align1; +/* + * Retrieve info from Cros Board Info store. Response is based on the data + * type. Integers return a uint32. Strings return a string, using the response + * size to determine how big it is. + */ +#define EC_CMD_GET_CROS_BOARD_INFO 0x011F +/* + * Write info into Cros Board Info on EEPROM. Write fails if the board has + * hardware write-protect enabled. + */ +#define EC_CMD_SET_CROS_BOARD_INFO 0x0120 + +enum cbi_data_tag { + CBI_TAG_BOARD_VERSION = 0, /* uint32_t or smaller */ + CBI_TAG_OEM_ID = 1, /* uint32_t or smaller */ + CBI_TAG_SKU_ID = 2, /* uint32_t or smaller */ + CBI_TAG_DRAM_PART_NUM = 3, /* variable length ascii, nul terminated. */ + CBI_TAG_OEM_NAME = 4, /* variable length ascii, nul terminated. */ + CBI_TAG_MODEL_ID = 5, /* uint32_t or smaller */ + CBI_TAG_COUNT, +}; + +/* + * Flags to control read operation + * + * RELOAD: Invalidate cache and read data from EEPROM. Useful to verify + * write was successful without reboot. + */ +#define CBI_GET_RELOAD BIT(0) + +struct ec_params_get_cbi { + uint32_t tag; /* enum cbi_data_tag */ + uint32_t flag; /* CBI_GET_* */ +} __ec_align4; + +/* + * Flags to control write behavior. + * + * NO_SYNC: Makes EC update data in RAM but skip writing to EEPROM. It's + * useful when writing multiple fields in a row. + * INIT: Need to be set when creating a new CBI from scratch. All fields + * will be initialized to zero first. + */ +#define CBI_SET_NO_SYNC BIT(0) +#define CBI_SET_INIT BIT(1) + +struct ec_params_set_cbi { + uint32_t tag; /* enum cbi_data_tag */ + uint32_t flag; /* CBI_SET_* */ + uint32_t size; /* Data size */ + uint8_t data[]; /* For string and raw data */ +} __ec_align1; + /*****************************************************************************/ /* Fingerprint MCU commands: range 0x0400-0x040x */ -- cgit v1.2.3 From 2f2e6d14866e34b0982460760f770265215f08c5 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:34:00 -0700 Subject: mfd: cros_ec: Add Management API entry points Add commands for test and management. Add command space for future development. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 113 +++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 860a76274334..fc8babce1576 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -5176,6 +5176,119 @@ struct ec_params_set_cbi { uint8_t data[]; /* For string and raw data */ } __ec_align1; +/* + * Information about resets of the AP by the EC and the EC's own uptime. + */ +#define EC_CMD_GET_UPTIME_INFO 0x0121 + +struct ec_response_uptime_info { + /* + * Number of milliseconds since the last EC boot. Sysjump resets + * typically do not restart the EC's time_since_boot epoch. + * + * WARNING: The EC's sense of time is much less accurate than the AP's + * sense of time, in both phase and frequency. This timebase is similar + * to CLOCK_MONOTONIC_RAW, but with 1% or more frequency error. + */ + uint32_t time_since_ec_boot_ms; + + /* + * Number of times the AP was reset by the EC since the last EC boot. + * Note that the AP may be held in reset by the EC during the initial + * boot sequence, such that the very first AP boot may count as more + * than one here. + */ + uint32_t ap_resets_since_ec_boot; + + /* + * The set of flags which describe the EC's most recent reset. See + * include/system.h RESET_FLAG_* for details. + */ + uint32_t ec_reset_flags; + + /* Empty log entries have both the cause and timestamp set to zero. */ + struct ap_reset_log_entry { + /* + * See include/chipset.h: enum chipset_{reset,shutdown}_reason + * for details. + */ + uint16_t reset_cause; + + /* Reserved for protocol growth. */ + uint16_t reserved; + + /* + * The time of the reset's assertion, in milliseconds since the + * last EC boot, in the same epoch as time_since_ec_boot_ms. + * Set to zero if the log entry is empty. + */ + uint32_t reset_time_ms; + } recent_ap_reset[4]; +} __ec_align4; + +/* + * Add entropy to the device secret (stored in the rollback region). + * + * Depending on the chip, the operation may take a long time (e.g. to erase + * flash), so the commands are asynchronous. + */ +#define EC_CMD_ADD_ENTROPY 0x0122 + +enum add_entropy_action { + /* Add entropy to the current secret. */ + ADD_ENTROPY_ASYNC = 0, + /* + * Add entropy, and also make sure that the previous secret is erased. + * (this can be implemented by adding entropy multiple times until + * all rolback blocks have been overwritten). + */ + ADD_ENTROPY_RESET_ASYNC = 1, + /* Read back result from the previous operation. */ + ADD_ENTROPY_GET_RESULT = 2, +}; + +struct ec_params_rollback_add_entropy { + uint8_t action; +} __ec_align1; + +/* + * Perform a single read of a given ADC channel. + */ +#define EC_CMD_ADC_READ 0x0123 + +struct ec_params_adc_read { + uint8_t adc_channel; +} __ec_align1; + +struct ec_response_adc_read { + int32_t adc_value; +} __ec_align4; + +/* + * Read back rollback info + */ +#define EC_CMD_ROLLBACK_INFO 0x0124 + +struct ec_response_rollback_info { + int32_t id; /* Incrementing number to indicate which region to use. */ + int32_t rollback_min_version; + int32_t rw_rollback_version; +} __ec_align4; + + +/* Issue AP reset */ +#define EC_CMD_AP_RESET 0x0125 + +/*****************************************************************************/ +/* The command range 0x200-0x2FF is reserved for Rotor. */ + +/*****************************************************************************/ +/* + * Reserve a range of host commands for the CR51 firmware. + */ +#define EC_CMD_CR51_BASE 0x0300 +#define EC_CMD_CR51_LAST 0x03FF + /*****************************************************************************/ /* Fingerprint MCU commands: range 0x0400-0x040x */ -- cgit v1.2.3 From 3aa6be30da899619c44aa654313ba66eb44e7291 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 3 Jun 2019 11:34:01 -0700 Subject: mfd: cros_ec: Update I2S API Improve I2S API. Rename ec_response_codec_gain into ec_codec_i2s_gain, update caller accordlingly. Signed-off-by: Gwendal Grignou Acked-by: Enric Balletbo i Serra Acked-by: Benson Leung Reviewed-by: Fabien Lahoudere Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 44 ++++++++++++++++-------------------- sound/soc/codecs/cros_ec_codec.c | 8 +++---- 2 files changed, 24 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index fc8babce1576..fa397722f17e 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -4471,6 +4471,7 @@ enum mkbp_cec_event { /* Commands for I2S recording on audio codec. */ #define EC_CMD_CODEC_I2S 0x00BC +#define EC_WOV_I2S_SAMPLE_RATE 48000 enum ec_codec_i2s_subcmd { EC_CODEC_SET_SAMPLE_DEPTH = 0x0, @@ -4480,6 +4481,7 @@ enum ec_codec_i2s_subcmd { EC_CODEC_I2S_SET_CONFIG = 0x4, EC_CODEC_I2S_SET_TDM_CONFIG = 0x5, EC_CODEC_I2S_SET_BCLK = 0x6, + EC_CODEC_I2S_SUBCMD_COUNT = 0x7, }; enum ec_sample_depth_value { @@ -4496,6 +4498,21 @@ enum ec_i2s_config { EC_DAI_FMT_PCM_TDM = 5, }; +/* + * For subcommand EC_CODEC_GET_GAIN. + */ +struct __ec_align1 ec_codec_i2s_gain { + uint8_t left; + uint8_t right; +}; + +struct __ec_todo_unpacked ec_param_codec_i2s_tdm { + int16_t ch0_delay; /* 0 to 496 */ + int16_t ch1_delay; /* -1 to 496 */ + uint8_t adjacent_to_ch0; + uint8_t adjacent_to_ch1; +}; + struct __ec_todo_packed ec_param_codec_i2s { /* enum ec_codec_i2s_subcmd */ uint8_t cmd; @@ -4510,10 +4527,7 @@ struct __ec_todo_packed ec_param_codec_i2s { * EC_CODEC_SET_GAIN * Value should be 0~43 for both channels. */ - struct __ec_align1 ec_param_codec_i2s_set_gain { - uint8_t left; - uint8_t right; - } gain; + struct ec_codec_i2s_gain gain; /* * EC_CODEC_I2S_ENABLE @@ -4522,7 +4536,7 @@ struct __ec_todo_packed ec_param_codec_i2s { uint8_t i2s_enable; /* - * EC_CODEC_I2S_SET_COFNIG + * EC_CODEC_I2S_SET_CONFIG * Value should be one of ec_i2s_config. */ uint8_t i2s_config; @@ -4531,18 +4545,7 @@ struct __ec_todo_packed ec_param_codec_i2s { * EC_CODEC_I2S_SET_TDM_CONFIG * Value should be one of ec_i2s_config. */ - struct __ec_todo_unpacked ec_param_codec_i2s_tdm { - /* - * 0 to 496 - */ - int16_t ch0_delay; - /* - * -1 to 496 - */ - int16_t ch1_delay; - uint8_t adjacent_to_ch0; - uint8_t adjacent_to_ch1; - } tdm_param; + struct ec_param_codec_i2s_tdm tdm_param; /* * EC_CODEC_I2S_SET_BCLK @@ -4551,13 +4554,6 @@ struct __ec_todo_packed ec_param_codec_i2s { }; }; -/* - * For subcommand EC_CODEC_GET_GAIN. - */ -struct ec_response_codec_gain { - uint8_t left; - uint8_t right; -} __ec_align1; /*****************************************************************************/ /* System commands */ diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c index 99a3af8a15ff..87830ed5ebf4 100644 --- a/sound/soc/codecs/cros_ec_codec.c +++ b/sound/soc/codecs/cros_ec_codec.c @@ -38,21 +38,21 @@ static const DECLARE_TLV_DB_SCALE(ec_mic_gain_tlv, 0, 100, 0); static int ec_command_get_gain(struct snd_soc_component *component, struct ec_param_codec_i2s *param, - struct ec_response_codec_gain *resp) + struct ec_codec_i2s_gain *resp) { struct cros_ec_codec_data *codec_data = snd_soc_component_get_drvdata(component); struct cros_ec_device *ec_device = codec_data->ec_device; u8 buffer[sizeof(struct cros_ec_command) + max(sizeof(struct ec_param_codec_i2s), - sizeof(struct ec_response_codec_gain))]; + sizeof(struct ec_codec_i2s_gain))]; struct cros_ec_command *msg = (struct cros_ec_command *)&buffer; int ret; msg->version = 0; msg->command = EC_CMD_CODEC_I2S; msg->outsize = sizeof(struct ec_param_codec_i2s); - msg->insize = sizeof(struct ec_response_codec_gain); + msg->insize = sizeof(struct ec_codec_i2s_gain); memcpy(msg->data, param, msg->outsize); @@ -226,7 +226,7 @@ static int get_ec_mic_gain(struct snd_soc_component *component, u8 *left, u8 *right) { struct ec_param_codec_i2s param; - struct ec_response_codec_gain resp; + struct ec_codec_i2s_gain resp; int ret; param.cmd = EC_CODEC_GET_GAIN; -- cgit v1.2.3 From fe03d4745675cbd678cb8c50d951df0abafdcaee Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Jun 2019 13:00:24 +0200 Subject: Update my email address It's better to use my kadlec@netfilter.org email address in the source code. I might not be able to use kadlec@blackhole.kfki.hu in the future. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Jozsef Kadlecsik --- CREDITS | 2 +- MAINTAINERS | 2 +- include/linux/jhash.h | 2 +- include/linux/netfilter/ipset/ip_set.h | 2 +- include/linux/netfilter/ipset/ip_set_counter.h | 2 +- include/linux/netfilter/ipset/ip_set_skbinfo.h | 2 +- include/linux/netfilter/ipset/ip_set_timeout.h | 2 +- include/uapi/linux/netfilter/ipset/ip_set.h | 2 +- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/nf_nat_h323.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++-- net/netfilter/ipset/ip_set_bitmap_port.c | 4 ++-- net/netfilter/ipset/ip_set_core.c | 4 ++-- net/netfilter/ipset/ip_set_getport.c | 2 +- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- net/netfilter/ipset/ip_set_hash_ip.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipmark.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipportip.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_mac.c | 4 ++-- net/netfilter/ipset/ip_set_hash_net.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netiface.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netnet.c | 2 +- net/netfilter/ipset/ip_set_hash_netport.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netportnet.c | 2 +- net/netfilter/ipset/ip_set_list_set.c | 4 ++-- net/netfilter/nf_conntrack_h323_main.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/xt_iprange.c | 4 ++-- net/netfilter/xt_set.c | 4 ++-- 34 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/CREDITS b/CREDITS index 8e0342620a06..4200f4f91a16 100644 --- a/CREDITS +++ b/CREDITS @@ -1800,7 +1800,7 @@ S: 2300 Copenhagen S. S: Denmark N: Jozsef Kadlecsik -E: kadlec@blackhole.kfki.hu +E: kadlec@netfilter.org P: 1024D/470DB964 4CB3 1A05 713E 9BF7 FAC5 5809 DD8C B7B1 470D B964 D: netfilter: TCP window tracking code D: netfilter: raw table diff --git a/MAINTAINERS b/MAINTAINERS index fcbd648b960e..4c65ce86fc9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10858,7 +10858,7 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso -M: Jozsef Kadlecsik +M: Jozsef Kadlecsik M: Florian Westphal L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 8037850f3104..ba2f6a9776b6 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -17,7 +17,7 @@ * if SELF_TEST is defined. You can use this free for any purpose. It's in * the public domain. It has no warranty. * - * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@netfilter.org) * * I've modified Bob's hash to be useful in the Linux kernel, and * any bugs present are my fault. diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e499d170f12d..f5c6e7cd6469 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h index 3d33a2c3f39f..305aeda2a899 100644 --- a/include/linux/netfilter/ipset/ip_set_counter.h +++ b/include/linux/netfilter/ipset/ip_set_counter.h @@ -1,7 +1,7 @@ #ifndef _IP_SET_COUNTER_H #define _IP_SET_COUNTER_H -/* Copyright (C) 2015 Jozsef Kadlecsik +/* Copyright (C) 2015 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h index 29d7ef2bc3fa..fac57ef854c2 100644 --- a/include/linux/netfilter/ipset/ip_set_skbinfo.h +++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h @@ -1,7 +1,7 @@ #ifndef _IP_SET_SKBINFO_H #define _IP_SET_SKBINFO_H -/* Copyright (C) 2015 Jozsef Kadlecsik +/* Copyright (C) 2015 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 8ce271e187b6..dc74150f3432 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -1,7 +1,7 @@ #ifndef _IP_SET_TIMEOUT_H #define _IP_SET_TIMEOUT_H -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index ea69ca21ff23..eea166c52c36 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -2,7 +2,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2011 Jozsef Kadlecsik + * Copyright (C) 2003-2011 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 6eefde5bc468..69697eb4bfc6 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -2,7 +2,7 @@ /* * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . * - * Copyright (C) 2003 Jozsef Kadlecsik + * Copyright (C) 2003 Jozsef Kadlecsik */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 15f2b2604890..076b6b29d66d 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -7,7 +7,7 @@ * This source code is licensed under General Public License version 2. * * Based on the 'brute force' H.323 NAT module by - * Jozsef Kadlecsik + * Jozsef Kadlecsik */ #include diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 3f7d4691c423..a22100b1cf2c 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -2,7 +2,7 @@ /* * IPv6 raw table, a port of the IPv4 raw table to IPv6 * - * Copyright (C) 2003 Jozsef Kadlecsik + * Copyright (C) 2003 Jozsef Kadlecsik */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 38ef2ea838cb..29c1e9a50601 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Jozsef Kadlecsik +/* Copyright (C) 2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 488d6d05c65c..5a66c5499700 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -1,6 +1,6 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf - * Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,7 +31,7 @@ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 980000fc3b50..ec7a8b12642c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,7 +31,7 @@ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b561ca8b3659..18275ec4924c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,7 +26,7 @@ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 039892cd2b7d..18430ad2fdf2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1,6 +1,6 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf - * Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -51,7 +51,7 @@ static unsigned int max_sets; module_param(max_sets, int, 0600); MODULE_PARM_DESC(max_sets, "maximal number of sets"); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 3f09cdb42562..dc7b46b41354 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik +/* Copyright (C) 2003-2011 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 623e0d675725..07ef941130a6 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Jozsef Kadlecsik +/* Copyright (C) 2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 613eb212cb48..7b82bf1104ce 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -30,7 +30,7 @@ #define IPSET_TYPE_REV_MAX 4 /* skbinfo support */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index f3ba8348cf9d..7d468f98a252 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * Copyright (C) 2013 Smoothwall Ltd. * * This program is free software; you can redistribute it and/or modify diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index ddb8039ec1d2..d358ee69d04b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ #define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index a7f4d7a85420..0a304785f912 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ #define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 88b83d6d3084..245f7d714870 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -34,7 +34,7 @@ #define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 4fe5f243d0a3..3d1fc71dac38 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 Jozsef Kadlecsik +/* Copyright (C) 2014 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,7 +23,7 @@ #define IPSET_TYPE_REV_MAX 0 MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:mac"); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5449e23af13a..470701fda231 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,7 +31,7 @@ #define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f5164c1efce2..1df8656ad84d 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 Jozsef Kadlecsik +/* Copyright (C) 2011-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ #define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 5a2b923bd81f..e0553be89600 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * Copyright (C) 2013 Oliver Smith * * This program is free software; you can redistribute it and/or modify diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 1a187be9ebc8..943d55d76fcf 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,7 +33,7 @@ #define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 613e18e720a4..afaff99e578c 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2013 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 4f894165cdcd..ed4360072f64 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2013 Jozsef Kadlecsik +/* Copyright (C) 2008-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,7 +22,7 @@ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 12de40390e97..1ff66e070cb2 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -7,7 +7,7 @@ * This source code is licensed under General Public License version 2. * * Based on the 'brute force' H.323 connection tracking module by - * Jozsef Kadlecsik + * Jozsef Kadlecsik * * For more information, please see http://nath323.sourceforge.net/ */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7ba01d8ee165..60b68400435d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1,6 +1,6 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team - * (C) 2002-2013 Jozsef Kadlecsik + * (C) 2002-2013 Jozsef Kadlecsik * (C) 2006-2012 Patrick McHardy * * This program is free software; you can redistribute it and/or modify diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index b46626cddd93..4ab4155706d7 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -1,7 +1,7 @@ /* * xt_iprange - Netfilter module to match IP address ranges * - * (C) 2003 Jozsef Kadlecsik + * (C) 2003 Jozsef Kadlecsik * (C) CC Computer Consultants GmbH, 2008 * * This program is free software; you can redistribute it and/or modify @@ -133,7 +133,7 @@ static void __exit iprange_mt_exit(void) module_init(iprange_mt_init); module_exit(iprange_mt_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching"); MODULE_ALIAS("ipt_iprange"); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index cf67bbe07dc2..f025c51ba375 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,7 +21,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_DESCRIPTION("Xtables: IP set match and target module"); MODULE_ALIAS("xt_SET"); MODULE_ALIAS("ipt_set"); -- cgit v1.2.3 From 4e23be473e3063a9d3bc06bb0aee89885fffab0e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 10 Jun 2019 04:48:05 -0700 Subject: bus: ti-sysc: Add support for module specific reset quirks Some older interconnect target modules need module internal clock toggling quirks to reset properly. We've been doing this in the platform code earlier, but need to be able to it directly in the ti-sysc driver when we no longer rely on on the platform code. Let's add reset handling for 1-wire, i2c and watchdog. Later on we can add more modules like msdi and dss as they get tested. For dra7 pcie, we should be able to just use the rstctrl reset driver when available. Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 129 ++++++++++++++++++++++++++++++++-- include/linux/platform_data/ti-sysc.h | 3 + 2 files changed, 127 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index a366ae548ec9..e6deabd8305d 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -71,6 +71,9 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = { * @name: name if available * @revision: interconnect target module revision * @needs_resume: runtime resume needed on resume from suspend + * @clk_enable_quirk: module specific clock enable quirk + * @clk_disable_quirk: module specific clock disable quirk + * @reset_done_quirk: module specific reset done quirk */ struct sysc { struct device *dev; @@ -94,6 +97,9 @@ struct sysc { unsigned int child_needs_resume:1; unsigned int disable_on_idle:1; struct delayed_work idle_work; + void (*clk_enable_quirk)(struct sysc *sysc); + void (*clk_disable_quirk)(struct sysc *sysc); + void (*reset_done_quirk)(struct sysc *sysc); }; static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np, @@ -760,8 +766,11 @@ static int sysc_ioremap(struct sysc *ddata) ddata->offsets[SYSC_SYSCONFIG], ddata->offsets[SYSC_SYSSTATUS]); + if (size < SZ_1K) + size = SZ_1K; + if ((size + sizeof(u32)) > ddata->module_size) - return -EINVAL; + size = ddata->module_size; } ddata->module_va = devm_ioremap(ddata->dev, @@ -1234,6 +1243,22 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK_EXT_OPT_CLOCK | SYSC_QUIRK_NO_RESET_ON_INIT | SYSC_QUIRK_SWSUP_SIDLE), + /* Quirks that need to be set based on detected module */ + SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff, + SYSC_MODULE_QUIRK_HDQ1W), + SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x0000000a, 0xffffffff, + SYSC_MODULE_QUIRK_HDQ1W), + SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x00000036, 0x000000ff, + SYSC_MODULE_QUIRK_I2C), + SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x0000003c, 0x000000ff, + SYSC_MODULE_QUIRK_I2C), + SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x00000040, 0x000000ff, + SYSC_MODULE_QUIRK_I2C), + SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xfffff0f0, + SYSC_MODULE_QUIRK_I2C), + SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, + SYSC_MODULE_QUIRK_WDT), + #ifdef DEBUG SYSC_QUIRK("adc", 0, 0, 0x10, -1, 0x47300001, 0xffffffff, 0), SYSC_QUIRK("atl", 0, 0, -1, -1, 0x0a070100, 0xffffffff, 0), @@ -1247,11 +1272,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("dwc3", 0, 0, 0x10, -1, 0x500a0200, 0xffffffff, 0), SYSC_QUIRK("epwmss", 0, 0, 0x4, -1, 0x47400001, 0xffffffff, 0), SYSC_QUIRK("gpu", 0, 0x1fc00, 0x1fc10, -1, 0, 0, 0), - SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff, 0), - SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x0000000a, 0xffffffff, 0), SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0), SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0), - SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xfffff0f0, 0), SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff, 0), SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44306302, 0xffffffff, 0), SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44307b02, 0xffffffff, 0), @@ -1287,7 +1309,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, -1, 0x50700101, 0xffffffff, 0), SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050, 0xffffffff, 0), - SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, 0), SYSC_QUIRK("vfpe", 0, 0, 0x104, -1, 0x4d001200, 0xffffffff, 0), #endif }; @@ -1360,6 +1381,94 @@ static void sysc_init_revision_quirks(struct sysc *ddata) } } +/* 1-wire needs module's internal clocks enabled for reset */ +static void sysc_clk_enable_quirk_hdq1w(struct sysc *ddata) +{ + int offset = 0x0c; /* HDQ_CTRL_STATUS */ + u16 val; + + val = sysc_read(ddata, offset); + val |= BIT(5); + sysc_write(ddata, offset, val); +} + +/* I2C needs extra enable bit toggling for reset */ +static void sysc_clk_quirk_i2c(struct sysc *ddata, bool enable) +{ + int offset; + u16 val; + + /* I2C_CON, omap2/3 is different from omap4 and later */ + if ((ddata->revision & 0xffffff00) == 0x001f0000) + offset = 0x24; + else + offset = 0xa4; + + /* I2C_EN */ + val = sysc_read(ddata, offset); + if (enable) + val |= BIT(15); + else + val &= ~BIT(15); + sysc_write(ddata, offset, val); +} + +static void sysc_clk_enable_quirk_i2c(struct sysc *ddata) +{ + sysc_clk_quirk_i2c(ddata, true); +} + +static void sysc_clk_disable_quirk_i2c(struct sysc *ddata) +{ + sysc_clk_quirk_i2c(ddata, false); +} + +/* Watchdog timer needs a disable sequence after reset */ +static void sysc_reset_done_quirk_wdt(struct sysc *ddata) +{ + int wps, spr, error; + u32 val; + + wps = 0x34; + spr = 0x48; + + sysc_write(ddata, spr, 0xaaaa); + error = readl_poll_timeout(ddata->module_va + wps, val, + !(val & 0x10), 100, + MAX_MODULE_SOFTRESET_WAIT); + if (error) + dev_warn(ddata->dev, "wdt disable spr failed\n"); + + sysc_write(ddata, wps, 0x5555); + error = readl_poll_timeout(ddata->module_va + wps, val, + !(val & 0x10), 100, + MAX_MODULE_SOFTRESET_WAIT); + if (error) + dev_warn(ddata->dev, "wdt disable wps failed\n"); +} + +static void sysc_init_module_quirks(struct sysc *ddata) +{ + if (ddata->legacy_mode || !ddata->name) + return; + + if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_HDQ1W) { + ddata->clk_enable_quirk = sysc_clk_enable_quirk_hdq1w; + + return; + } + + if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_I2C) { + ddata->clk_enable_quirk = sysc_clk_enable_quirk_i2c; + ddata->clk_disable_quirk = sysc_clk_disable_quirk_i2c; + + return; + } + + if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT) + ddata->reset_done_quirk = sysc_reset_done_quirk_wdt; +} + static int sysc_clockdomain_init(struct sysc *ddata) { struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev); @@ -1468,10 +1577,16 @@ static int sysc_reset(struct sysc *ddata) else syss_done = ddata->cfg.syss_mask; + if (ddata->clk_disable_quirk) + ddata->clk_disable_quirk(ddata); + sysc_val = sysc_read_sysconfig(ddata); sysc_val |= sysc_mask; sysc_write(ddata, sysc_offset, sysc_val); + if (ddata->clk_enable_quirk) + ddata->clk_enable_quirk(ddata); + /* Poll on reset status */ if (syss_offset >= 0) { error = readx_poll_timeout(sysc_read_sysstatus, ddata, rstval, @@ -1485,6 +1600,9 @@ static int sysc_reset(struct sysc *ddata) 100, MAX_MODULE_SOFTRESET_WAIT); } + if (ddata->reset_done_quirk) + ddata->reset_done_quirk(ddata); + return error; } @@ -1531,6 +1649,7 @@ static int sysc_init_module(struct sysc *ddata) ddata->revision = sysc_read_revision(ddata); sysc_init_revision_quirks(ddata); + sysc_init_module_quirks(ddata); if (ddata->legacy_mode) { error = sysc_legacy_init(ddata); diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 8822e99ff813..0c587d4fc718 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -47,6 +47,9 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_HDQ1W BIT(17) +#define SYSC_MODULE_QUIRK_I2C BIT(16) +#define SYSC_MODULE_QUIRK_WDT BIT(15) #define SYSS_QUIRK_RESETDONE_INVERTED BIT(14) #define SYSC_QUIRK_SWSUP_MSTANDBY BIT(13) #define SYSC_QUIRK_SWSUP_SIDLE_ACT BIT(12) -- cgit v1.2.3 From e36acfe6c86d13eec62321e1e86a1ce287e52e7d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 May 2019 09:41:19 -0300 Subject: mm/hmm: Use hmm_mirror not mm as an argument for hmm_range_register Ralph observes that hmm_range_register() can only be called by a driver while a mirror is registered. Make this clear in the API by passing in the mirror structure as a parameter. This also simplifies understanding the lifetime model for struct hmm, as the hmm pointer must be valid as part of a registered mirror so all we need in hmm_register_range() is a simple kref_get. Suggested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Reviewed-by: Ralph Campbell Reviewed-by: Ira Weiny Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 7 ++++--- mm/hmm.c | 13 ++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 93ed43c413f0..8c92374afcf2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -649,7 +649,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = hmm_vma_fault(&range, true); + ret = hmm_vma_fault(&svmm->mirror, &range, true); if (ret == 0) { mutex_lock(&svmm->mutex); if (!hmm_vma_range_done(&range)) { diff --git a/include/linux/hmm.h b/include/linux/hmm.h index cb01cf1fa3c0..1fba6979adf4 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -496,7 +496,7 @@ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror) * Please see Documentation/vm/hmm.rst for how to use the range API. */ int hmm_range_register(struct hmm_range *range, - struct mm_struct *mm, + struct hmm_mirror *mirror, unsigned long start, unsigned long end, unsigned page_shift); @@ -532,7 +532,8 @@ static inline bool hmm_vma_range_done(struct hmm_range *range) } /* This is a temporary helper to avoid merge conflict between trees. */ -static inline int hmm_vma_fault(struct hmm_range *range, bool block) +static inline int hmm_vma_fault(struct hmm_mirror *mirror, + struct hmm_range *range, bool block) { long ret; @@ -545,7 +546,7 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block) range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, range->vma->vm_mm, + ret = hmm_range_register(range, mirror, range->start, range->end, PAGE_SHIFT); if (ret) diff --git a/mm/hmm.c b/mm/hmm.c index f6956d78e3cb..22a97ada108b 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -914,13 +914,13 @@ static void hmm_pfns_clear(struct hmm_range *range, * Track updates to the CPU page table see include/linux/hmm.h */ int hmm_range_register(struct hmm_range *range, - struct mm_struct *mm, + struct hmm_mirror *mirror, unsigned long start, unsigned long end, unsigned page_shift) { unsigned long mask = ((1UL << page_shift) - 1UL); - struct hmm *hmm; + struct hmm *hmm = mirror->hmm; range->valid = false; range->hmm = NULL; @@ -934,20 +934,15 @@ int hmm_range_register(struct hmm_range *range, range->start = start; range->end = end; - hmm = hmm_get_or_create(mm); - if (!hmm) - return -EFAULT; - /* Check if hmm_mm_destroy() was call. */ - if (hmm->mm == NULL || hmm->dead) { - hmm_put(hmm); + if (hmm->mm == NULL || hmm->dead) return -EFAULT; - } /* Initialize range to track CPU page table updates. */ mutex_lock(&hmm->lock); range->hmm = hmm; + kref_get(&hmm->kref); list_add_rcu(&range->list, &hmm->ranges); /* -- cgit v1.2.3 From c8a53b2db0aec40d8b217936e1b7f3d840c50390 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 May 2019 10:36:46 -0300 Subject: mm/hmm: Hold a mmgrab from hmm to mm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So long as a struct hmm pointer exists, so should the struct mm it is linked too. Hold the mmgrab() as soon as a hmm is created, and mmdrop() it once the hmm refcount goes to zero. Since mmdrop() (ie a 0 kref on struct mm) is now impossible with a !NULL mm->hmm delete the hmm_hmm_destroy(). Signed-off-by: Jason Gunthorpe Reviewed-by: Jérôme Glisse Reviewed-by: John Hubbard Reviewed-by: Ralph Campbell Reviewed-by: Ira Weiny Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- include/linux/hmm.h | 3 --- kernel/fork.c | 1 - mm/hmm.c | 22 ++++------------------ 3 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 1fba6979adf4..1d97b6d62c5b 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -577,14 +577,11 @@ static inline int hmm_vma_fault(struct hmm_mirror *mirror, } /* Below are for HMM internal use only! Not to be used by device driver! */ -void hmm_mm_destroy(struct mm_struct *mm); - static inline void hmm_mm_init(struct mm_struct *mm) { mm->hmm = NULL; } #else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..c704c3cedee7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -673,7 +673,6 @@ void __mmdrop(struct mm_struct *mm) WARN_ON_ONCE(mm == current->active_mm); mm_free_pgd(mm); destroy_context(mm); - hmm_mm_destroy(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); diff --git a/mm/hmm.c b/mm/hmm.c index 22a97ada108b..080b17a2e87e 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) hmm->notifiers = 0; hmm->dead = false; hmm->mm = mm; + mmgrab(hmm->mm); spin_lock(&mm->page_table_lock); if (!mm->hmm) @@ -100,6 +102,7 @@ error_mm: mm->hmm = NULL; spin_unlock(&mm->page_table_lock); error: + mmdrop(hmm->mm); kfree(hmm); return NULL; } @@ -121,6 +124,7 @@ static void hmm_free(struct kref *kref) mm->hmm = NULL; spin_unlock(&mm->page_table_lock); + mmdrop(hmm->mm); mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); } @@ -129,24 +133,6 @@ static inline void hmm_put(struct hmm *hmm) kref_put(&hmm->kref, hmm_free); } -void hmm_mm_destroy(struct mm_struct *mm) -{ - struct hmm *hmm; - - spin_lock(&mm->page_table_lock); - hmm = mm_get_hmm(mm); - mm->hmm = NULL; - if (hmm) { - hmm->mm = NULL; - hmm->dead = true; - spin_unlock(&mm->page_table_lock); - hmm_put(hmm); - return; - } - - spin_unlock(&mm->page_table_lock); -} - static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); -- cgit v1.2.3 From 7fae8a9ced742f364604a88a53084f471cc9c6e5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 9 Jun 2019 01:11:38 +0200 Subject: fmc: Decouple from Linux GPIO subsystem FMC has its own GPIO handling, the inclusion of is only to reuse some flags that we can just as well provide using local defines. Cc: Federico Vaga Cc: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Linus Walleij --- drivers/fmc/fmc-trivial.c | 1 - include/linux/fmc.h | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/fmc/fmc-trivial.c b/drivers/fmc/fmc-trivial.c index 8defdee3e3a3..b99dbc7ee203 100644 --- a/drivers/fmc/fmc-trivial.c +++ b/drivers/fmc/fmc-trivial.c @@ -15,7 +15,6 @@ #include #include #include -#include #include static struct fmc_driver t_drv; /* initialized later */ diff --git a/include/linux/fmc.h b/include/linux/fmc.h index b355f3806f3f..8661a46a676f 100644 --- a/include/linux/fmc.h +++ b/include/linux/fmc.h @@ -100,7 +100,7 @@ struct fmc_gpio { char *carrier_name; /* name or NULL for virtual pins */ int gpio; int _gpio; /* internal use by the carrier */ - int mode; /* GPIOF_DIR_OUT etc, from */ + int mode; /* GPIOF_DIR_OUT etc */ int irqmode; /* IRQF_TRIGGER_LOW and so on */ }; @@ -114,13 +114,15 @@ struct fmc_gpio { #define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */ /* We may add SCL and SDA, or other roles if the need arises */ -/* GPIOF_DIR_IN etc are missing before 3.0. copy from */ -#ifndef GPIOF_DIR_IN -# define GPIOF_DIR_OUT (0 << 0) -# define GPIOF_DIR_IN (1 << 0) -# define GPIOF_INIT_LOW (0 << 1) -# define GPIOF_INIT_HIGH (1 << 1) -#endif +/* + * These are similar to the legacy Linux GPIO defines from + * but in fact FMC has its own GPIO handling and is not using the Linux + * GPIO subsystem. + */ +#define GPIOF_DIR_OUT (0 << 0) +#define GPIOF_DIR_IN (1 << 0) +#define GPIOF_INIT_LOW (0 << 1) +#define GPIOF_INIT_HIGH (1 << 1) /* * The operations are offered by each carrier and should make driver -- cgit v1.2.3 From fada7fdc83c0bf8755956bff707c42b609223301 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Thu, 6 Jun 2019 13:59:40 -0700 Subject: bpf: Allow bpf_map_lookup_elem() on an xskmap Currently, the AF_XDP code uses a separate map in order to determine if an xsk is bound to a queue. Instead of doing this, have bpf_map_lookup_elem() return a xdp_sock. Rearrange some xdp_sock members to eliminate structure holes. Remove selftest - will be added back in later patch. Signed-off-by: Jonathan Lemon Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +++++ include/net/xdp_sock.h | 4 +-- include/uapi/linux/bpf.h | 4 +++ kernel/bpf/verifier.c | 26 ++++++++++++-- kernel/bpf/xskmap.c | 7 ++++ net/core/filter.c | 40 ++++++++++++++++++++++ .../selftests/bpf/verifier/prevent_map_lookup.c | 15 -------- 7 files changed, 85 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e5a309e6a400..1fe137afa898 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -280,6 +280,7 @@ enum bpf_reg_type { PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ + PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ }; /* The information passed from prog-specific *_is_valid_access @@ -727,6 +728,13 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_flush(struct bpf_map *map); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..ae0f368a62bb 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,11 +58,11 @@ struct xdp_sock { struct xdp_umem *umem; struct list_head flush_node; u16 queue_id; - struct xsk_queue *tx ____cacheline_aligned_in_smp; - struct list_head list; bool zc; /* Protects multiple processes in the control path */ struct mutex mutex; + struct xsk_queue *tx ____cacheline_aligned_in_smp; + struct list_head list; /* Mutual exclusion of NAPI TX thread and sendmsg error paths * in the SKB destructor callback. */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7c6aef253173..ae0907d8c03a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3083,6 +3083,10 @@ struct bpf_sock_tuple { }; }; +struct bpf_xdp_sock { + __u32 queue_id; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c2cb5bd84ce..8d1786357a09 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -334,7 +334,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type) { return type == PTR_TO_SOCKET || type == PTR_TO_SOCK_COMMON || - type == PTR_TO_TCP_SOCK; + type == PTR_TO_TCP_SOCK || + type == PTR_TO_XDP_SOCK; } static bool reg_type_may_be_null(enum bpf_reg_type type) @@ -406,6 +407,7 @@ static const char * const reg_type_str[] = { [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", [PTR_TO_TP_BUFFER] = "tp_buffer", + [PTR_TO_XDP_SOCK] = "xdp_sock", }; static char slot_type_char[] = { @@ -1363,6 +1365,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: return true; default: return false; @@ -1843,6 +1846,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, case PTR_TO_TCP_SOCK: valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); break; + case PTR_TO_XDP_SOCK: + valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); + break; default: valid = false; } @@ -2007,6 +2013,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_TCP_SOCK: pointer_desc = "tcp_sock "; break; + case PTR_TO_XDP_SOCK: + pointer_desc = "xdp_sock "; + break; default: break; } @@ -2905,10 +2914,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, * appear. */ case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: if (func_id != BPF_FUNC_redirect_map) goto error; break; + case BPF_MAP_TYPE_XSKMAP: + if (func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_elem) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) @@ -3799,6 +3812,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -5038,6 +5052,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, if (reg->map_ptr->inner_map_meta) { reg->type = CONST_PTR_TO_MAP; reg->map_ptr = reg->map_ptr->inner_map_meta; + } else if (reg->map_ptr->map_type == + BPF_MAP_TYPE_XSKMAP) { + reg->type = PTR_TO_XDP_SOCK; } else { reg->type = PTR_TO_MAP_VALUE; } @@ -6299,6 +6316,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -6693,6 +6711,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: return false; default: return true; @@ -7826,6 +7845,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_TCP_SOCK: convert_ctx_access = bpf_tcp_sock_convert_ctx_access; break; + case PTR_TO_XDP_SOCK: + convert_ctx_access = bpf_xdp_sock_convert_ctx_access; + break; default: continue; } diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 413d75f4fc72..ef7338cebd18 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -151,6 +151,12 @@ void __xsk_map_flush(struct bpf_map *map) } static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return __xsk_map_lookup_elem(map, *(u32 *)key); +} + +static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } @@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = { .map_free = xsk_map_free, .map_get_next_key = xsk_map_get_next_key, .map_lookup_elem = xsk_map_lookup_elem, + .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, diff --git a/net/core/filter.c b/net/core/filter.c index f2777dc0b624..a5e4ac7fcbe5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5680,6 +5680,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) return INET_ECN_set_ce(skb); } +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id)) + return false; + + if (off % size != 0) + return false; + + switch (off) { + default: + return size == sizeof(__u32); + } +} + +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + +#define BPF_XDP_SOCK_GET(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \ + FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\ + si->dst_reg, si->src_reg, \ + offsetof(struct xdp_sock, FIELD)); \ + } while (0) + + switch (si->off) { + case offsetof(struct bpf_xdp_sock, queue_id): + BPF_XDP_SOCK_GET(queue_id); + break; + } + + return insn - insn_buf; +} + static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { .func = bpf_skb_ecn_set_ce, .gpl_only = false, diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c index bbdba990fefb..da7a4b37cb98 100644 --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c @@ -28,21 +28,6 @@ .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, -{ - "prevent map lookup in xskmap", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_xskmap = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_XDP, -}, { "prevent map lookup in stack trace", .insns = { -- cgit v1.2.3 From 90b4c55586155cf13bbafbd4e55327f89681859d Mon Sep 17 00:00:00 2001 From: Zeev Zilberman Date: Mon, 10 Jun 2019 13:52:01 +0300 Subject: irqchip/gic-v2m: Add support for Amazon Graviton variant of GICv3+GICv2m Add support for Amazon Graviton custom variant of GICv2m, where the message is encoded using the MSI message address, as opposed to standard GICv2m, where the SPI number is encoded in the MSI message data. In addition, the Graviton flavor of GICv2m is used along GICv3 (and not GICv2). Co-developed-by: Benjamin Herrenschmidt Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Zeev Zilberman Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v2m.c | 85 +++++++++++++++++++++++++++------- drivers/irqchip/irq-gic-v3.c | 3 ++ include/linux/irqchip/arm-gic-common.h | 5 ++ include/linux/irqchip/arm-gic.h | 3 -- 4 files changed, 76 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 3c77ab676e54..5356739d4799 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -56,6 +56,7 @@ /* List of flags for specific v2m implementation */ #define GICV2M_NEEDS_SPI_OFFSET 0x00000001 +#define GICV2M_GRAVITON_ADDRESS_ONLY 0x00000002 static LIST_HEAD(v2m_nodes); static DEFINE_SPINLOCK(v2m_lock); @@ -98,15 +99,26 @@ static struct msi_domain_info gicv2m_msi_domain_info = { .chip = &gicv2m_msi_irq_chip, }; +static phys_addr_t gicv2m_get_msi_addr(struct v2m_data *v2m, int hwirq) +{ + if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) + return v2m->res.start | ((hwirq - 32) << 3); + else + return v2m->res.start + V2M_MSI_SETSPI_NS; +} + static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct v2m_data *v2m = irq_data_get_irq_chip_data(data); - phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS; + phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq); msg->address_hi = upper_32_bits(addr); msg->address_lo = lower_32_bits(addr); - msg->data = data->hwirq; + if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) + msg->data = 0; + else + msg->data = data->hwirq; if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET) msg->data -= v2m->spi_offset; @@ -188,7 +200,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, hwirq = v2m->spi_start + offset; err = iommu_dma_prepare_msi(info->desc, - v2m->res.start + V2M_MSI_SETSPI_NS); + gicv2m_get_msi_addr(v2m, hwirq)); if (err) return err; @@ -307,7 +319,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent) static int __init gicv2m_init_one(struct fwnode_handle *fwnode, u32 spi_start, u32 nr_spis, - struct resource *res) + struct resource *res, u32 flags) { int ret; struct v2m_data *v2m; @@ -320,6 +332,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, INIT_LIST_HEAD(&v2m->entry); v2m->fwnode = fwnode; + v2m->flags = flags; memcpy(&v2m->res, res, sizeof(struct resource)); @@ -334,7 +347,14 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, v2m->spi_start = spi_start; v2m->nr_spis = nr_spis; } else { - u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER); + u32 typer; + + /* Graviton should always have explicit spi_start/nr_spis */ + if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) { + ret = -EINVAL; + goto err_iounmap; + } + typer = readl_relaxed(v2m->base + V2M_MSI_TYPER); v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer); v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer); @@ -355,18 +375,21 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, * * Broadom NS2 GICv2m implementation has an erratum where the MSI data * is 'spi_number - 32' + * + * Reading that register fails on the Graviton implementation */ - switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) { - case XGENE_GICV2M_MSI_IIDR: - v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; - v2m->spi_offset = v2m->spi_start; - break; - case BCM_NS2_GICV2M_MSI_IIDR: - v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; - v2m->spi_offset = 32; - break; + if (!(v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)) { + switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) { + case XGENE_GICV2M_MSI_IIDR: + v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; + v2m->spi_offset = v2m->spi_start; + break; + case BCM_NS2_GICV2M_MSI_IIDR: + v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; + v2m->spi_offset = 32; + break; + } } - v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long), GFP_KERNEL); if (!v2m->bm) { @@ -419,7 +442,8 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n", spi_start, nr_spis); - ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res); + ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, + &res, 0); if (ret) { of_node_put(child); break; @@ -451,6 +475,25 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) return data->fwnode; } +static bool acpi_check_amazon_graviton_quirks(void) +{ + static struct acpi_table_madt *madt; + acpi_status status; + bool rc = false; + +#define ACPI_AMZN_OEM_ID "AMAZON" + + status = acpi_get_table(ACPI_SIG_MADT, 0, + (struct acpi_table_header **)&madt); + + if (ACPI_FAILURE(status) || !madt) + return rc; + rc = !memcmp(madt->header.oem_id, ACPI_AMZN_OEM_ID, ACPI_OEM_ID_SIZE); + acpi_put_table((struct acpi_table_header *)madt); + + return rc; +} + static int __init acpi_parse_madt_msi(union acpi_subtable_headers *header, const unsigned long end) @@ -460,6 +503,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header, u32 spi_start = 0, nr_spis = 0; struct acpi_madt_generic_msi_frame *m; struct fwnode_handle *fwnode; + u32 flags = 0; m = (struct acpi_madt_generic_msi_frame *)header; if (BAD_MADT_ENTRY(m, end)) @@ -469,6 +513,13 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header, res.end = m->base_address + SZ_4K - 1; res.flags = IORESOURCE_MEM; + if (acpi_check_amazon_graviton_quirks()) { + pr_info("applying Amazon Graviton quirk\n"); + res.end = res.start + SZ_8K - 1; + flags |= GICV2M_GRAVITON_ADDRESS_ONLY; + gicv2m_msi_domain_info.flags &= ~MSI_FLAG_MULTI_PCI_MSI; + } + if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) { spi_start = m->spi_base; nr_spis = m->spi_count; @@ -483,7 +534,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header, return -EINVAL; } - ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res); + ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res, flags); if (ret) irq_domain_free_fwnode(fwnode); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f44cd89cfc40..1282f81696b2 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1343,6 +1343,9 @@ static int __init gic_init_bases(void __iomem *dist_base, if (gic_dist_supports_lpis()) { its_init(handle, &gic_data.rdists, gic_data.domain); its_cpu_init(); + } else { + if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) + gicv2m_init(handle, gic_data.domain); } if (gic_prio_masking_enabled()) { diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index 9a1a479a2bf4..62a882104790 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -39,4 +39,9 @@ struct gic_kvm_info { const struct gic_kvm_info *gic_get_kvm_info(void); +struct irq_domain; +struct fwnode_handle; +int gicv2m_init(struct fwnode_handle *parent_handle, + struct irq_domain *parent); + #endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */ diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 0f049b384ccd..7bd3bc6baa40 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -160,9 +160,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq); */ void gic_init(void __iomem *dist , void __iomem *cpu); -int gicv2m_init(struct fwnode_handle *parent_handle, - struct irq_domain *parent); - void gic_send_sgi(unsigned int cpu_id, unsigned int irq); int gic_get_cpu_id(unsigned int cpu); void gic_migrate_target(unsigned int new_cpu_id); -- cgit v1.2.3 From 78b99577b3934e3e787fe0c52aa1b59442c8bbb5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 10 Jun 2019 00:09:53 +0900 Subject: pinctrl: remove unused pin_is_valid() This function was used by pin_request() to pointlessly double-check the pin validity, and it was the only user ever. Since commit d2f6a1c6fb0e ("pinctrl: remove double pin validity check."), no one has ever used it. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 23 ----------------------- include/linux/pinctrl/pinctrl.h | 10 ---------- 2 files changed, 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 04787eefe2a2..e745788fa36f 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -178,29 +178,6 @@ const char *pin_get_name(struct pinctrl_dev *pctldev, const unsigned pin) return desc->name; } -/** - * pin_is_valid() - check if pin exists on controller - * @pctldev: the pin control device to check the pin on - * @pin: pin to check, use the local pin controller index number - * - * This tells us whether a certain pin exist on a certain pin controller or - * not. Pin lists may be sparse, so some pins may not exist. - */ -bool pin_is_valid(struct pinctrl_dev *pctldev, int pin) -{ - struct pin_desc *pindesc; - - if (pin < 0) - return false; - - mutex_lock(&pctldev->mutex); - pindesc = pin_desc_get(pctldev, pin); - mutex_unlock(&pctldev->mutex); - - return pindesc != NULL; -} -EXPORT_SYMBOL_GPL(pin_is_valid); - /* Deletes a range of pin descriptors */ static void pinctrl_free_pindescs(struct pinctrl_dev *pctldev, const struct pinctrl_pin_desc *pins, diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 2744113f1024..36a79fe7b84f 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -172,7 +172,6 @@ extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, extern void devm_pinctrl_unregister(struct device *dev, struct pinctrl_dev *pctldev); -extern bool pin_is_valid(struct pinctrl_dev *pctldev, int pin); extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); extern void pinctrl_add_gpio_ranges(struct pinctrl_dev *pctldev, @@ -203,15 +202,6 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np) extern const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev); extern const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev); extern void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev); -#else - -struct pinctrl_dev; - -/* Sufficiently stupid default functions when pinctrl is not in use */ -static inline bool pin_is_valid(struct pinctrl_dev *pctldev, int pin) -{ - return pin >= 0; -} #endif /* !CONFIG_PINCTRL */ -- cgit v1.2.3 From ec6bc2e9e81b8805390851d7c7c907b0ed08b646 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 3 Jun 2019 15:57:46 +0100 Subject: driver core: Add per device iommu param DMA faults can be detected by IOMMU at device level. Adding a pointer to struct device allows IOMMU subsystem to report relevant faults back to the device driver for further handling. For direct assigned device (or user space drivers), guest OS holds responsibility to handle and respond per device IOMMU fault. Therefore we need fault reporting mechanism to propagate faults beyond IOMMU subsystem. There are two other IOMMU data pointers under struct device today, here we introduce iommu_param as a parent pointer such that all device IOMMU data can be consolidated here. The idea was suggested here by Greg KH and Joerg. The name iommu_param is chosen here since iommu_data has been used. Suggested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jacob Pan Link: https://lkml.org/lkml/2017/10/6/81 Signed-off-by: Joerg Roedel --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e85264fb6616..f0a975abd6e9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,6 +42,7 @@ struct iommu_ops; struct iommu_group; struct iommu_fwspec; struct dev_pin_info; +struct iommu_param; struct bus_attribute { struct attribute attr; @@ -959,6 +960,7 @@ struct dev_links_info { * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. * @iommu_fwspec: IOMMU-specific properties supplied by firmware. + * @iommu_param: Per device generic IOMMU runtime data * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). @@ -1052,6 +1054,7 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; struct iommu_fwspec *iommu_fwspec; + struct iommu_param *iommu_param; bool offline_disabled:1; bool offline:1; -- cgit v1.2.3 From 4e32348ba5269aac1165f496b78189201568dd8c Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 3 Jun 2019 15:57:47 +0100 Subject: iommu: Introduce device fault data Device faults detected by IOMMU can be reported outside the IOMMU subsystem for further processing. This patch introduces a generic device fault data structure. The fault can be either an unrecoverable fault or a page request, also referred to as a recoverable fault. We only care about non internal faults that are likely to be reported to an external subsystem. Signed-off-by: Jacob Pan Signed-off-by: Jean-Philippe Brucker Signed-off-by: Liu, Yi L Signed-off-by: Ashok Raj Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 39 +++++++++++++++ include/uapi/linux/iommu.h | 118 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 include/uapi/linux/iommu.h (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a815cf6f6f47..2b05056d5fa7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -25,6 +25,7 @@ #include #include #include +#include #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -49,6 +50,7 @@ struct device; struct iommu_domain; struct notifier_block; struct iommu_sva; +struct iommu_fault_event; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *, void *); +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -301,6 +304,41 @@ struct iommu_device { struct device *dev; }; +/** + * struct iommu_fault_event - Generic fault event + * + * Can represent recoverable faults such as a page requests or + * unrecoverable faults such as DMA or IRQ remapping faults. + * + * @fault: fault descriptor + */ +struct iommu_fault_event { + struct iommu_fault fault; +}; + +/** + * struct iommu_fault_param - per-device IOMMU fault data + * @handler: Callback function to handle IOMMU faults at device level + * @data: handler private data + */ +struct iommu_fault_param { + iommu_dev_fault_handler_t handler; + void *data; +}; + +/** + * struct iommu_param - collection of per-device IOMMU data + * + * @fault_param: IOMMU detected device fault reporting data + * + * TODO: migrate other per device data pointers under iommu_dev_data, e.g. + * struct iommu_group *iommu_group; + * struct iommu_fwspec *iommu_fwspec; + */ +struct iommu_param { + struct iommu_fault_param *fault_param; +}; + int iommu_device_register(struct iommu_device *iommu); void iommu_device_unregister(struct iommu_device *iommu); int iommu_device_sysfs_add(struct iommu_device *iommu, @@ -504,6 +542,7 @@ struct iommu_ops {}; struct iommu_group {}; struct iommu_fwspec {}; struct iommu_device {}; +struct iommu_fault_param {}; static inline bool iommu_present(struct bus_type *bus) { diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h new file mode 100644 index 000000000000..796402174d6c --- /dev/null +++ b/include/uapi/linux/iommu.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * IOMMU user API definitions + */ + +#ifndef _UAPI_IOMMU_H +#define _UAPI_IOMMU_H + +#include + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */ + IOMMU_FAULT_PAGE_REQ, /* page request fault */ +}; + +enum iommu_fault_reason { + IOMMU_FAULT_REASON_UNKNOWN = 0, + + /* Could not access the PASID table (fetch caused external abort) */ + IOMMU_FAULT_REASON_PASID_FETCH, + + /* PASID entry is invalid or has configuration errors */ + IOMMU_FAULT_REASON_BAD_PASID_ENTRY, + + /* + * PASID is out of range (e.g. exceeds the maximum PASID + * supported by the IOMMU) or disabled. + */ + IOMMU_FAULT_REASON_PASID_INVALID, + + /* + * An external abort occurred fetching (or updating) a translation + * table descriptor + */ + IOMMU_FAULT_REASON_WALK_EABT, + + /* + * Could not access the page table entry (Bad address), + * actual translation fault + */ + IOMMU_FAULT_REASON_PTE_FETCH, + + /* Protection flag check failed */ + IOMMU_FAULT_REASON_PERMISSION, + + /* access flag check failed */ + IOMMU_FAULT_REASON_ACCESS, + + /* Output address of a translation stage caused Address Size fault */ + IOMMU_FAULT_REASON_OOR_ADDRESS, +}; + +/** + * struct iommu_fault_unrecoverable - Unrecoverable fault data + * @reason: reason of the fault, from &enum iommu_fault_reason + * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values) + * @pasid: Process Address Space ID + * @perm: requested permission access using by the incoming transaction + * (IOMMU_FAULT_PERM_* values) + * @addr: offending page address + * @fetch_addr: address that caused a fetch abort, if any + */ +struct iommu_fault_unrecoverable { + __u32 reason; +#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0) +#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1) +#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2) + __u32 flags; + __u32 pasid; + __u32 perm; + __u64 addr; + __u64 fetch_addr; +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @padding: reserved for future use (should be zero) + * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + */ +struct iommu_fault { + __u32 type; + __u32 padding; + union { + struct iommu_fault_unrecoverable event; + struct iommu_fault_page_request prm; + }; +}; +#endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From 0c830e6b32826311fc2b9ea1f4679be0f4ef0933 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 3 Jun 2019 15:57:48 +0100 Subject: iommu: Introduce device fault report API Traditionally, device specific faults are detected and handled within their own device drivers. When IOMMU is enabled, faults such as DMA related transactions are detected by IOMMU. There is no generic reporting mechanism to report faults back to the in-kernel device driver or the guest OS in case of assigned devices. This patch introduces a registration API for device specific fault handlers. This differs from the existing iommu_set_fault_handler/ report_iommu_fault infrastructures in several ways: - it allows to report more sophisticated fault events (both unrecoverable faults and page request faults) due to the nature of the iommu_fault struct - it is device specific and not domain specific. The current iommu_report_device_fault() implementation only handles the "shoot and forget" unrecoverable fault case. Handling of page request faults or stalled faults will come later. Signed-off-by: Jacob Pan Signed-off-by: Ashok Raj Signed-off-by: Jean-Philippe Brucker Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/iommu.h | 29 ++++++++++ 2 files changed, 172 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3fa025f849e9..293a6fa716e0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -107,15 +107,43 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_unlock(&iommu_device_lock); } +static struct iommu_param *iommu_get_dev_param(struct device *dev) +{ + struct iommu_param *param = dev->iommu_param; + + if (param) + return param; + + param = kzalloc(sizeof(*param), GFP_KERNEL); + if (!param) + return NULL; + + mutex_init(¶m->lock); + dev->iommu_param = param; + return param; +} + +static void iommu_free_dev_param(struct device *dev) +{ + kfree(dev->iommu_param); + dev->iommu_param = NULL; +} + int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; - int ret = -EINVAL; + int ret; WARN_ON(dev->iommu_group); + if (!ops) + return -EINVAL; - if (ops) - ret = ops->add_device(dev); + if (!iommu_get_dev_param(dev)) + return -ENOMEM; + + ret = ops->add_device(dev); + if (ret) + iommu_free_dev_param(dev); return ret; } @@ -126,6 +154,8 @@ void iommu_release_device(struct device *dev) if (dev->iommu_group) ops->remove_device(dev); + + iommu_free_dev_param(dev); } static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, @@ -854,6 +884,116 @@ int iommu_group_unregister_notifier(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); +/** + * iommu_register_device_fault_handler() - Register a device fault handler + * @dev: the device + * @handler: the fault handler + * @data: private data passed as argument to the handler + * + * When an IOMMU fault event is received, this handler gets called with the + * fault event and data as argument. The handler should return 0 on success. + * + * Return 0 if the fault handler was installed successfully, or an error. + */ +int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data) +{ + struct iommu_param *param = dev->iommu_param; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + /* Only allow one fault handler registered for each device */ + if (param->fault_param) { + ret = -EBUSY; + goto done_unlock; + } + + get_device(dev); + param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); + if (!param->fault_param) { + put_device(dev); + ret = -ENOMEM; + goto done_unlock; + } + param->fault_param->handler = handler; + param->fault_param->data = data; + +done_unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); + +/** + * iommu_unregister_device_fault_handler() - Unregister the device fault handler + * @dev: the device + * + * Remove the device fault handler installed with + * iommu_register_device_fault_handler(). + * + * Return 0 on success, or an error. + */ +int iommu_unregister_device_fault_handler(struct device *dev) +{ + struct iommu_param *param = dev->iommu_param; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + + if (!param->fault_param) + goto unlock; + + kfree(param->fault_param); + param->fault_param = NULL; + put_device(dev); +unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); + +/** + * iommu_report_device_fault() - Report fault event to device driver + * @dev: the device + * @evt: fault event data + * + * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ + * handler. + * + * Return 0 on success, or an error. + */ +int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +{ + struct iommu_param *param = dev->iommu_param; + struct iommu_fault_param *fparam; + int ret = 0; + + if (!param || !evt) + return -EINVAL; + + /* we only report device fault if there is a handler registered */ + mutex_lock(¶m->lock); + fparam = param->fault_param; + if (!fparam || !fparam->handler) { + ret = -EINVAL; + goto done_unlock; + } + ret = fparam->handler(&evt->fault, fparam->data); +done_unlock: + mutex_unlock(¶m->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_report_device_fault); + /** * iommu_group_id - Return ID for a group * @group: the group to ID diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2b05056d5fa7..3e783f5bf472 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -336,6 +336,7 @@ struct iommu_fault_param { * struct iommu_fwspec *iommu_fwspec; */ struct iommu_param { + struct mutex lock; struct iommu_fault_param *fault_param; }; @@ -428,6 +429,15 @@ extern int iommu_group_register_notifier(struct iommu_group *group, struct notifier_block *nb); extern int iommu_group_unregister_notifier(struct iommu_group *group, struct notifier_block *nb); +extern int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data); + +extern int iommu_unregister_device_fault_handler(struct device *dev); + +extern int iommu_report_device_fault(struct device *dev, + struct iommu_fault_event *evt); + extern int iommu_group_id(struct iommu_group *group); extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -736,6 +746,25 @@ static inline int iommu_group_unregister_notifier(struct iommu_group *group, return 0; } +static inline +int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data) +{ + return -ENODEV; +} + +static inline int iommu_unregister_device_fault_handler(struct device *dev) +{ + return 0; +} + +static inline +int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +{ + return -ENODEV; +} + static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; -- cgit v1.2.3 From bf3255b3cfe2d06280340dbac3f44b65d3ee6da3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 3 Jun 2019 15:57:49 +0100 Subject: iommu: Add recoverable fault reporting Some IOMMU hardware features, for example PCI PRI and Arm SMMU Stall, enable recoverable I/O page faults. Allow IOMMU drivers to report PRI Page Requests and Stall events through the new fault reporting API. The consumer of the fault can be either an I/O page fault handler in the host, or a guest OS. Once handled, the fault must be completed by sending a page response back to the IOMMU. Add an iommu_page_response() function to complete a page fault. There are two ways to extend the userspace API: * Add a field to iommu_page_response and a flag to iommu_page_response::flags describing the validity of this field. * Introduce a new iommu_page_response_X structure with a different version number. The kernel must then support both versions. Signed-off-by: Jacob Pan Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 94 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/iommu.h | 19 ++++++++++ include/uapi/linux/iommu.h | 35 +++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 293a6fa716e0..ac1f29c19e59 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -891,7 +891,14 @@ EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); * @data: private data passed as argument to the handler * * When an IOMMU fault event is received, this handler gets called with the - * fault event and data as argument. The handler should return 0 on success. + * fault event and data as argument. The handler should return 0 on success. If + * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also + * complete the fault by calling iommu_page_response() with one of the following + * response code: + * - IOMMU_PAGE_RESP_SUCCESS: retry the translation + * - IOMMU_PAGE_RESP_INVALID: terminate the fault + * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting + * page faults if possible. * * Return 0 if the fault handler was installed successfully, or an error. */ @@ -921,6 +928,8 @@ int iommu_register_device_fault_handler(struct device *dev, } param->fault_param->handler = handler; param->fault_param->data = data; + mutex_init(¶m->fault_param->lock); + INIT_LIST_HEAD(¶m->fault_param->faults); done_unlock: mutex_unlock(¶m->lock); @@ -951,6 +960,12 @@ int iommu_unregister_device_fault_handler(struct device *dev) if (!param->fault_param) goto unlock; + /* we cannot unregister handler if there are pending faults */ + if (!list_empty(¶m->fault_param->faults)) { + ret = -EBUSY; + goto unlock; + } + kfree(param->fault_param); param->fault_param = NULL; put_device(dev); @@ -967,13 +982,15 @@ EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); * @evt: fault event data * * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ - * handler. + * handler. When this function fails and the fault is recoverable, it is the + * caller's responsibility to complete the fault. * * Return 0 on success, or an error. */ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) { struct iommu_param *param = dev->iommu_param; + struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_param *fparam; int ret = 0; @@ -987,13 +1004,86 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ret = -EINVAL; goto done_unlock; } + + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), + GFP_KERNEL); + if (!evt_pending) { + ret = -ENOMEM; + goto done_unlock; + } + mutex_lock(&fparam->lock); + list_add_tail(&evt_pending->list, &fparam->faults); + mutex_unlock(&fparam->lock); + } + ret = fparam->handler(&evt->fault, fparam->data); + if (ret && evt_pending) { + mutex_lock(&fparam->lock); + list_del(&evt_pending->list); + mutex_unlock(&fparam->lock); + kfree(evt_pending); + } done_unlock: mutex_unlock(¶m->lock); return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); +int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + bool pasid_valid; + int ret = -EINVAL; + struct iommu_fault_event *evt; + struct iommu_fault_page_request *prm; + struct iommu_param *param = dev->iommu_param; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (!domain || !domain->ops->page_response) + return -ENODEV; + + if (!param || !param->fault_param) + return -EINVAL; + + if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || + msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) + return -EINVAL; + + /* Only send response if there is a fault report pending */ + mutex_lock(¶m->fault_param->lock); + if (list_empty(¶m->fault_param->faults)) { + dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); + goto done_unlock; + } + /* + * Check if we have a matching page request pending to respond, + * otherwise return -EINVAL + */ + list_for_each_entry(evt, ¶m->fault_param->faults, list) { + prm = &evt->fault.prm; + pasid_valid = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + + if ((pasid_valid && prm->pasid != msg->pasid) || + prm->grpid != msg->grpid) + continue; + + /* Sanitize the reply */ + msg->flags = pasid_valid ? IOMMU_PAGE_RESP_PASID_VALID : 0; + + ret = domain->ops->page_response(dev, evt, msg); + list_del(&evt->list); + kfree(evt); + break; + } + +done_unlock: + mutex_unlock(¶m->fault_param->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_page_response); + /** * iommu_group_id - Return ID for a group * @group: the group to ID diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3e783f5bf472..76c8cda61dfd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -227,6 +227,7 @@ struct iommu_sva_ops { * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle + * @page_response: handle page request response * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -287,6 +288,10 @@ struct iommu_ops { void (*sva_unbind)(struct iommu_sva *handle); int (*sva_get_pasid)(struct iommu_sva *handle); + int (*page_response)(struct device *dev, + struct iommu_fault_event *evt, + struct iommu_page_response *msg); + unsigned long pgsize_bitmap; }; @@ -311,19 +316,25 @@ struct iommu_device { * unrecoverable faults such as DMA or IRQ remapping faults. * * @fault: fault descriptor + * @list: pending fault event list, used for tracking responses */ struct iommu_fault_event { struct iommu_fault fault; + struct list_head list; }; /** * struct iommu_fault_param - per-device IOMMU fault data * @handler: Callback function to handle IOMMU faults at device level * @data: handler private data + * @faults: holds the pending faults which needs response + * @lock: protect pending faults list */ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; + struct list_head faults; + struct mutex lock; }; /** @@ -437,6 +448,8 @@ extern int iommu_unregister_device_fault_handler(struct device *dev); extern int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt); +extern int iommu_page_response(struct device *dev, + struct iommu_page_response *msg); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); @@ -765,6 +778,12 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) return -ENODEV; } +static inline int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + return -ENODEV; +} + static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 796402174d6c..f45d8e9e59c3 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -115,4 +115,39 @@ struct iommu_fault { struct iommu_fault_page_request prm; }; }; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @version: API version of this structure + * @flags: encodes whether the corresponding fields are valid + * (IOMMU_FAULT_PAGE_RESPONSE_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { +#define IOMMU_PAGE_RESP_VERSION_1 1 + __u32 version; +#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 code; +}; + #endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From adfd373820906d376c8b643f1a279ac809605b6b Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 3 Jun 2019 08:53:35 +0200 Subject: iommu: Introduce IOMMU_RESV_DIRECT_RELAXABLE reserved memory regions Introduce a new type for reserved region. This corresponds to directly mapped regions which are known to be relaxable in some specific conditions, such as device assignment use case. Well known examples are those used by USB controllers providing PS/2 keyboard emulation for pre-boot BIOS and early BOOT or RMRRs associated to IGD working in legacy mode. Since commit c875d2c1b808 ("iommu/vt-d: Exclude devices using RMRRs from IOMMU API domains") and commit 18436afdc11a ("iommu/vt-d: Allow RMRR on graphics devices too"), those regions are currently considered "safe" with respect to device assignment use case which requires a non direct mapping at IOMMU physical level (RAM GPA -> HPA mapping). Those RMRRs currently exist and sometimes the device is attempting to access it but this has not been considered an issue until now. However at the moment, iommu_get_group_resv_regions() is not able to make any difference between directly mapped regions: those which must be absolutely enforced and those like above ones which are known as relaxable. This is a blocker for reporting severe conflicts between non relaxable RMRRs (like MSI doorbells) and guest GPA space. With this new reserved region type we will be able to use iommu_get_group_resv_regions() to enumerate the IOVA space that is usable through the IOMMU API without introducing regressions with respect to existing device assignment use cases (USB and IGD). Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel --- Documentation/ABI/testing/sysfs-kernel-iommu_groups | 9 +++++++++ drivers/iommu/iommu.c | 12 +++++++----- include/linux/iommu.h | 6 ++++++ 3 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index 35c64e00b35c..017f5bc3920c 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -24,3 +24,12 @@ Description: /sys/kernel/iommu_groups/reserved_regions list IOVA region is described on a single line: the 1st field is the base IOVA, the second is the end IOVA and the third field describes the type of the region. + +What: /sys/kernel/iommu_groups/reserved_regions +Date: June 2019 +KernelVersion: v5.3 +Contact: Eric Auger +Description: In case an RMRR is used only by graphics or USB devices + it is now exposed as "direct-relaxable" instead of "direct". + In device assignment use case, for instance, those RMRR + are considered to be relaxable and safe. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ba0661744a3d..46a06ff46e47 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -73,10 +73,11 @@ struct iommu_group_attribute { }; static const char * const iommu_group_resv_type_string[] = { - [IOMMU_RESV_DIRECT] = "direct", - [IOMMU_RESV_RESERVED] = "reserved", - [IOMMU_RESV_MSI] = "msi", - [IOMMU_RESV_SW_MSI] = "msi", + [IOMMU_RESV_DIRECT] = "direct", + [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", + [IOMMU_RESV_RESERVED] = "reserved", + [IOMMU_RESV_MSI] = "msi", + [IOMMU_RESV_SW_MSI] = "msi", }; #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ @@ -575,7 +576,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); - if (entry->type != IOMMU_RESV_DIRECT) + if (entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) continue; for (addr = start; addr < end; addr += pg_size) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 91af22a344e2..ab7a1c85af75 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -135,6 +135,12 @@ enum iommu_attr { enum iommu_resv_type { /* Memory regions which must be mapped 1:1 at all times */ IOMMU_RESV_DIRECT, + /* + * Memory regions which are advertised to be 1:1 but are + * commonly considered relaxable in some conditions, + * for instance in device assignment use case (USB, Graphics) + */ + IOMMU_RESV_DIRECT_RELAXABLE, /* Arbitrary "never map this or give it to a device" address ranges */ IOMMU_RESV_RESERVED, /* Hardware MSI region (untranslated) */ -- cgit v1.2.3 From 18bd49c4c7c22a59634c8142d8618f5da8d29250 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 10 Jun 2019 20:11:00 +0300 Subject: gpio: omap: constify register tables We must never alter the register tables; these are read-only as far as the driver is concerned. Constify these tables. Signed-off-by: Russell King Signed-off-by: Grygorii Strashko Tested-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 12 ++++++------ include/linux/platform_data/gpio-omap.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 1c5fa12bcf9f..039bbb1ae6cb 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -48,6 +48,8 @@ struct gpio_regs { struct gpio_bank { void __iomem *base; + const struct omap_gpio_reg_offs *regs; + int irq; u32 non_wakeup_gpios; u32 enabled_non_wakeup_gpios; @@ -75,8 +77,6 @@ struct gpio_bank { void (*set_dataout)(struct gpio_bank *bank, unsigned gpio, int enable); int (*get_context_loss_count)(struct device *dev); - - struct omap_gpio_reg_offs *regs; }; #define GPIO_MOD_CTRL_BIT BIT(0) @@ -1075,7 +1075,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) static void omap_gpio_init_context(struct gpio_bank *p) { - struct omap_gpio_reg_offs *regs = p->regs; + const struct omap_gpio_reg_offs *regs = p->regs; void __iomem *base = p->base; p->context.ctrl = readl_relaxed(base + regs->ctrl); @@ -1094,7 +1094,7 @@ static void omap_gpio_init_context(struct gpio_bank *p) static void omap_gpio_restore_context(struct gpio_bank *bank) { - struct omap_gpio_reg_offs *regs = bank->regs; + const struct omap_gpio_reg_offs *regs = bank->regs; void __iomem *base = bank->base; writel_relaxed(bank->context.wake_en, base + regs->wkup_en); @@ -1267,7 +1267,7 @@ static int gpio_omap_cpu_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static struct omap_gpio_reg_offs omap2_gpio_regs = { +static const struct omap_gpio_reg_offs omap2_gpio_regs = { .revision = OMAP24XX_GPIO_REVISION, .direction = OMAP24XX_GPIO_OE, .datain = OMAP24XX_GPIO_DATAIN, @@ -1290,7 +1290,7 @@ static struct omap_gpio_reg_offs omap2_gpio_regs = { .fallingdetect = OMAP24XX_GPIO_FALLINGDETECT, }; -static struct omap_gpio_reg_offs omap4_gpio_regs = { +static const struct omap_gpio_reg_offs omap4_gpio_regs = { .revision = OMAP4_GPIO_REVISION, .direction = OMAP4_GPIO_OE, .datain = OMAP4_GPIO_DATAIN, diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index 7c36370c062e..1ca400005233 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -200,7 +200,7 @@ struct omap_gpio_platform_data { bool is_mpuio; /* whether the bank is of type MPUIO */ u32 non_wakeup_gpios; - struct omap_gpio_reg_offs *regs; + const struct omap_gpio_reg_offs *regs; /* Return context loss count due to PM states changing */ int (*get_context_loss_count)(struct device *dev); -- cgit v1.2.3 From 68bc30bb9f33fc8d11e3d110d29e06490896a999 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Thu, 6 Jun 2019 09:22:34 +0800 Subject: proc: Add /proc//arch_status Exposing architecture specific per process information is useful for various reasons. An example is the AVX512 usage on x86 which is important for task placement for power/performance optimizations. Adding this information to the existing /prcc/pid/status file would be the obvious choise, but it has been agreed on that a explicit arch_status file is better in separating the generic and architecture specific information. [ tglx: Massage changelog ] Signed-off-by: Aubrey Li Signed-off-by: Thomas Gleixner Acked-by: Andrew Morton Cc: peterz@infradead.org Cc: hpa@zytor.com Cc: ak@linux.intel.com Cc: tim.c.chen@linux.intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: adobriyan@gmail.com Cc: aubrey.li@intel.com Cc: linux-api@vger.kernel.org Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andi Kleen Cc: Tim Chen Cc: Dave Hansen Cc: Arjan van de Ven Cc: Alexey Dobriyan Cc: Linux API Link: https://lkml.kernel.org/r/20190606012236.9391-1-aubrey.li@linux.intel.com --- fs/proc/Kconfig | 4 ++++ fs/proc/base.c | 6 ++++++ include/linux/proc_fs.h | 9 +++++++++ 3 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 62ee41b4bbd0..4c3dcb718961 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -98,3 +98,7 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..ec436c61eece 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3449,6 +3452,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 52a283ba0465..a705aa2d03f9 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); +#ifdef CONFIG_PROC_PID_ARCH_STATUS +/* + * The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must + * provide proc_pid_arch_status() definition. + */ +int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +#endif /* CONFIG_PROC_PID_ARCH_STATUS */ + #else /* CONFIG_PROC_FS */ static inline void proc_root_init(void) -- cgit v1.2.3 From 0b673b6486998061b0489b09447ebe8452da0146 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 May 2019 11:46:34 -0700 Subject: firmware: arm_scmi: fetch and store sensor scale In preparation for dealing with scales within the SCMI HWMON driver, fetch and store the sensor unit scale into the scmi_sensor_info structure. In order to simplify computations for upper layer, take care of sign extending the scale to a full 8-bit signed value. Reviewed-by: Guenter Roeck Signed-off-by: Florian Fainelli [sudeep.holla: update bitfield values as per specification] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/sensors.c | 6 ++++++ include/linux/scmi_protocol.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index c00287b5f2c2..0e94ab56f679 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -34,6 +34,8 @@ struct scmi_msg_resp_sensor_description { __le32 attributes_high; #define SENSOR_TYPE(x) ((x) & 0xff) #define SENSOR_SCALE(x) (((x) >> 11) & 0x1f) +#define SENSOR_SCALE_SIGN BIT(4) +#define SENSOR_SCALE_EXTEND GENMASK(7, 5) #define SENSOR_UPDATE_SCALE(x) (((x) >> 22) & 0x1f) #define SENSOR_UPDATE_BASE(x) (((x) >> 27) & 0x1f) u8 name[SCMI_MAX_STR_SIZE]; @@ -140,6 +142,10 @@ static int scmi_sensor_description_get(const struct scmi_handle *handle, s = &si->sensors[desc_index + cnt]; s->id = le32_to_cpu(buf->desc[cnt].id); s->type = SENSOR_TYPE(attrh); + s->scale = SENSOR_SCALE(attrh); + /* Sign extend to a full s8 */ + if (s->scale & SENSOR_SCALE_SIGN) + s->scale |= SENSOR_SCALE_EXTEND; strlcpy(s->name, buf->desc[cnt].name, SCMI_MAX_STR_SIZE); } diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 3105055c00a7..9ff2e9357e9a 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -144,6 +144,7 @@ struct scmi_power_ops { struct scmi_sensor_info { u32 id; u8 type; + s8 scale; char name[SCMI_MAX_STR_SIZE]; }; -- cgit v1.2.3 From 81f4458c9c6998fcd37c427d16d76d4dba65d015 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 28 May 2019 16:10:24 +0300 Subject: firmware: ti_sci: extend clock identifiers from u8 to u32 Future SoCs are going to have more than 255 device clocks in certain cases, and thus the API must be extended to support this. The support is done in backwards compatible extension, in which the new u32 clock identifier fields are only used if the existing u8 size clock identifier is set as 255. In all the other cases, the existing u8 clock identifier is used. As the size of the messages sent / received is not verified for existing devices / old firmware, increasing the size of the messages from the end is also fine. Due to this reason, depending on ABI version isn't necessary either. Acked-by: Santosh Shilimkar Signed-off-by: Tero Kristo --- drivers/firmware/ti_sci.c | 115 ++++++++++++++++++++++++--------- drivers/firmware/ti_sci.h | 63 ++++++++++++++---- include/linux/soc/ti/ti_sci_protocol.h | 28 ++++---- 3 files changed, 150 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index ef93406ace1b..b417cef35769 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -916,7 +916,7 @@ static int ti_sci_cmd_get_device_resets(const struct ti_sci_handle *handle, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_set_clock_state(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, + u32 dev_id, u32 clk_id, u32 flags, u8 state) { struct ti_sci_info *info; @@ -944,7 +944,12 @@ static int ti_sci_set_clock_state(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_set_clock_state *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } req->request_state = state; ret = ti_sci_do_xfer(info, xfer); @@ -976,7 +981,7 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_get_clock_state(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, + u32 dev_id, u32 clk_id, u8 *programmed_state, u8 *current_state) { struct ti_sci_info *info; @@ -1007,7 +1012,12 @@ static int ti_sci_cmd_get_clock_state(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_get_clock_state *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } ret = ti_sci_do_xfer(info, xfer); if (ret) { @@ -1047,8 +1057,8 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_get_clock(const struct ti_sci_handle *handle, u32 dev_id, - u8 clk_id, bool needs_ssc, bool can_change_freq, - bool enable_input_term) + u32 clk_id, bool needs_ssc, + bool can_change_freq, bool enable_input_term) { u32 flags = 0; @@ -1073,7 +1083,7 @@ static int ti_sci_cmd_get_clock(const struct ti_sci_handle *handle, u32 dev_id, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_idle_clock(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id) + u32 dev_id, u32 clk_id) { return ti_sci_set_clock_state(handle, dev_id, clk_id, 0, MSG_CLOCK_SW_STATE_UNREQ); @@ -1092,7 +1102,7 @@ static int ti_sci_cmd_idle_clock(const struct ti_sci_handle *handle, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_put_clock(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id) + u32 dev_id, u32 clk_id) { return ti_sci_set_clock_state(handle, dev_id, clk_id, 0, MSG_CLOCK_SW_STATE_AUTO); @@ -1110,7 +1120,7 @@ static int ti_sci_cmd_put_clock(const struct ti_sci_handle *handle, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_is_auto(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, bool *req_state) + u32 dev_id, u32 clk_id, bool *req_state) { u8 state = 0; int ret; @@ -1139,7 +1149,7 @@ static int ti_sci_cmd_clk_is_auto(const struct ti_sci_handle *handle, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_is_on(const struct ti_sci_handle *handle, u32 dev_id, - u8 clk_id, bool *req_state, bool *curr_state) + u32 clk_id, bool *req_state, bool *curr_state) { u8 c_state = 0, r_state = 0; int ret; @@ -1172,7 +1182,7 @@ static int ti_sci_cmd_clk_is_on(const struct ti_sci_handle *handle, u32 dev_id, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_is_off(const struct ti_sci_handle *handle, u32 dev_id, - u8 clk_id, bool *req_state, bool *curr_state) + u32 clk_id, bool *req_state, bool *curr_state) { u8 c_state = 0, r_state = 0; int ret; @@ -1204,7 +1214,7 @@ static int ti_sci_cmd_clk_is_off(const struct ti_sci_handle *handle, u32 dev_id, * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_set_parent(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, u8 parent_id) + u32 dev_id, u32 clk_id, u32 parent_id) { struct ti_sci_info *info; struct ti_sci_msg_req_set_clock_parent *req; @@ -1231,8 +1241,18 @@ static int ti_sci_cmd_clk_set_parent(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_set_clock_parent *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; - req->parent_id = parent_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } + if (parent_id < 255) { + req->parent_id = parent_id; + } else { + req->parent_id = 255; + req->parent_id_32 = parent_id; + } ret = ti_sci_do_xfer(info, xfer); if (ret) { @@ -1262,7 +1282,7 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_get_parent(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, u8 *parent_id) + u32 dev_id, u32 clk_id, u32 *parent_id) { struct ti_sci_info *info; struct ti_sci_msg_req_get_clock_parent *req; @@ -1289,7 +1309,12 @@ static int ti_sci_cmd_clk_get_parent(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_get_clock_parent *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } ret = ti_sci_do_xfer(info, xfer); if (ret) { @@ -1299,10 +1324,14 @@ static int ti_sci_cmd_clk_get_parent(const struct ti_sci_handle *handle, resp = (struct ti_sci_msg_resp_get_clock_parent *)xfer->xfer_buf; - if (!ti_sci_is_response_ack(resp)) + if (!ti_sci_is_response_ack(resp)) { ret = -ENODEV; - else - *parent_id = resp->parent_id; + } else { + if (resp->parent_id < 255) + *parent_id = resp->parent_id; + else + *parent_id = resp->parent_id_32; + } fail: ti_sci_put_one_xfer(&info->minfo, xfer); @@ -1322,8 +1351,8 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_get_num_parents(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, - u8 *num_parents) + u32 dev_id, u32 clk_id, + u32 *num_parents) { struct ti_sci_info *info; struct ti_sci_msg_req_get_clock_num_parents *req; @@ -1350,7 +1379,12 @@ static int ti_sci_cmd_clk_get_num_parents(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_get_clock_num_parents *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } ret = ti_sci_do_xfer(info, xfer); if (ret) { @@ -1360,10 +1394,14 @@ static int ti_sci_cmd_clk_get_num_parents(const struct ti_sci_handle *handle, resp = (struct ti_sci_msg_resp_get_clock_num_parents *)xfer->xfer_buf; - if (!ti_sci_is_response_ack(resp)) + if (!ti_sci_is_response_ack(resp)) { ret = -ENODEV; - else - *num_parents = resp->num_parents; + } else { + if (resp->num_parents < 255) + *num_parents = resp->num_parents; + else + *num_parents = resp->num_parents_32; + } fail: ti_sci_put_one_xfer(&info->minfo, xfer); @@ -1391,7 +1429,7 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_get_match_freq(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, u64 min_freq, + u32 dev_id, u32 clk_id, u64 min_freq, u64 target_freq, u64 max_freq, u64 *match_freq) { @@ -1420,7 +1458,12 @@ static int ti_sci_cmd_clk_get_match_freq(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_query_clock_freq *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } req->min_freq_hz = min_freq; req->target_freq_hz = target_freq; req->max_freq_hz = max_freq; @@ -1463,7 +1506,7 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_set_freq(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, u64 min_freq, + u32 dev_id, u32 clk_id, u64 min_freq, u64 target_freq, u64 max_freq) { struct ti_sci_info *info; @@ -1491,7 +1534,12 @@ static int ti_sci_cmd_clk_set_freq(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_set_clock_freq *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } req->min_freq_hz = min_freq; req->target_freq_hz = target_freq; req->max_freq_hz = max_freq; @@ -1524,7 +1572,7 @@ fail: * Return: 0 if all went well, else returns appropriate error value. */ static int ti_sci_cmd_clk_get_freq(const struct ti_sci_handle *handle, - u32 dev_id, u8 clk_id, u64 *freq) + u32 dev_id, u32 clk_id, u64 *freq) { struct ti_sci_info *info; struct ti_sci_msg_req_get_clock_freq *req; @@ -1551,7 +1599,12 @@ static int ti_sci_cmd_clk_get_freq(const struct ti_sci_handle *handle, } req = (struct ti_sci_msg_req_get_clock_freq *)xfer->xfer_buf; req->dev_id = dev_id; - req->clk_id = clk_id; + if (clk_id < 255) { + req->clk_id = clk_id; + } else { + req->clk_id = 255; + req->clk_id_32 = clk_id; + } ret = ti_sci_do_xfer(info, xfer); if (ret) { diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index 4983827151bf..ad0b47981b87 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -202,7 +202,8 @@ struct ti_sci_msg_req_set_device_resets { * @dev_id: Device identifier this request is for * @clk_id: Clock identifier for the device for this request. * Each device has it's own set of clock inputs. This indexes - * which clock input to modify. + * which clock input to modify. Set to 255 if clock ID is + * greater than or equal to 255. * @request_state: Request the state for the clock to be set to. * MSG_CLOCK_SW_STATE_UNREQ: The IP does not require this clock, * it can be disabled, regardless of the state of the device @@ -213,6 +214,9 @@ struct ti_sci_msg_req_set_device_resets { * being required by the device.(default) * MSG_CLOCK_SW_STATE_REQ: Configure the clock to be enabled, * regardless of the state of the device. + * @clk_id_32: Clock identifier for the device for this request. + * Only to be used if the clock ID is greater than or equal to + * 255. * * Normally, all required clocks are managed by TISCI entity, this is used * only for specific control *IF* required. Auto managed state is @@ -234,6 +238,7 @@ struct ti_sci_msg_req_set_clock_state { #define MSG_CLOCK_SW_STATE_AUTO 1 #define MSG_CLOCK_SW_STATE_REQ 2 u8 request_state; + u32 clk_id_32; } __packed; /** @@ -242,7 +247,11 @@ struct ti_sci_msg_req_set_clock_state { * @dev_id: Device identifier this request is for * @clk_id: Clock identifier for the device for this request. * Each device has it's own set of clock inputs. This indexes - * which clock input to get state of. + * which clock input to get state of. Set to 255 if the clock + * ID is greater than or equal to 255. + * @clk_id_32: Clock identifier for the device for the request. + * Only to be used if the clock ID is greater than or equal to + * 255. * * Request type is TI_SCI_MSG_GET_CLOCK_STATE, response is state * of the clock @@ -251,6 +260,7 @@ struct ti_sci_msg_req_get_clock_state { struct ti_sci_msg_hdr hdr; u32 dev_id; u8 clk_id; + u32 clk_id_32; } __packed; /** @@ -278,9 +288,13 @@ struct ti_sci_msg_resp_get_clock_state { * @dev_id: Device identifier this request is for * @clk_id: Clock identifier for the device for this request. * Each device has it's own set of clock inputs. This indexes - * which clock input to modify. + * which clock input to modify. Set to 255 if clock ID is + * greater than or equal to 255. * @parent_id: The new clock parent is selectable by an index via this - * parameter. + * parameter. Set to 255 if clock ID is greater than or + * equal to 255. + * @clk_id_32: Clock identifier if @clk_id field is 255. + * @parent_id_32: Parent identifier if @parent_id is 255. * * Request type is TI_SCI_MSG_SET_CLOCK_PARENT, response is generic * ACK / NACK message. @@ -290,6 +304,8 @@ struct ti_sci_msg_req_set_clock_parent { u32 dev_id; u8 clk_id; u8 parent_id; + u32 clk_id_32; + u32 parent_id_32; } __packed; /** @@ -298,7 +314,10 @@ struct ti_sci_msg_req_set_clock_parent { * @dev_id: Device identifier this request is for * @clk_id: Clock identifier for the device for this request. * Each device has it's own set of clock inputs. This indexes - * which clock input to get the parent for. + * which clock input to get the parent for. If this field + * contains 255, the actual clock identifier is stored in + * @clk_id_32. + * @clk_id_32: Clock identifier if the @clk_id field contains 255. * * Request type is TI_SCI_MSG_GET_CLOCK_PARENT, response is parent information */ @@ -306,25 +325,32 @@ struct ti_sci_msg_req_get_clock_parent { struct ti_sci_msg_hdr hdr; u32 dev_id; u8 clk_id; + u32 clk_id_32; } __packed; /** * struct ti_sci_msg_resp_get_clock_parent - Response with clock parent * @hdr: Generic Header - * @parent_id: The current clock parent + * @parent_id: The current clock parent. If set to 255, the current parent + * ID can be found from the @parent_id_32 field. + * @parent_id_32: Current clock parent if @parent_id field is set to + * 255. * * Response to TI_SCI_MSG_GET_CLOCK_PARENT. */ struct ti_sci_msg_resp_get_clock_parent { struct ti_sci_msg_hdr hdr; u8 parent_id; + u32 parent_id_32; } __packed; /** * struct ti_sci_msg_req_get_clock_num_parents - Request to get clock parents * @hdr: Generic header * @dev_id: Device identifier this request is for - * @clk_id: Clock identifier for the device for this request. + * @clk_id: Clock identifier for the device for this request. Set to + * 255 if clock ID is greater than or equal to 255. + * @clk_id_32: Clock identifier if the @clk_id field contains 255. * * This request provides information about how many clock parent options * are available for a given clock to a device. This is typically used @@ -337,18 +363,24 @@ struct ti_sci_msg_req_get_clock_num_parents { struct ti_sci_msg_hdr hdr; u32 dev_id; u8 clk_id; + u32 clk_id_32; } __packed; /** * struct ti_sci_msg_resp_get_clock_num_parents - Response for get clk parents * @hdr: Generic header - * @num_parents: Number of clock parents + * @num_parents: Number of clock parents. If set to 255, the actual + * number of parents is stored into @num_parents_32 + * field instead. + * @num_parents_32: Number of clock parents if @num_parents field is + * set to 255. * * Response to TI_SCI_MSG_GET_NUM_CLOCK_PARENTS */ struct ti_sci_msg_resp_get_clock_num_parents { struct ti_sci_msg_hdr hdr; u8 num_parents; + u32 num_parents_32; } __packed; /** @@ -363,7 +395,9 @@ struct ti_sci_msg_resp_get_clock_num_parents { * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum * allowable programmed frequency and does not account for clock * tolerances and jitter. - * @clk_id: Clock identifier for the device for this request. + * @clk_id: Clock identifier for the device for this request. Set to + * 255 if clock identifier is greater than or equal to 255. + * @clk_id_32: Clock identifier if @clk_id is set to 255. * * NOTE: Normally clock frequency management is automatically done by TISCI * entity. In case of specific requests, TISCI evaluates capability to achieve @@ -380,6 +414,7 @@ struct ti_sci_msg_req_query_clock_freq { u64 target_freq_hz; u64 max_freq_hz; u8 clk_id; + u32 clk_id_32; } __packed; /** @@ -407,7 +442,9 @@ struct ti_sci_msg_resp_query_clock_freq { * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum * allowable programmed frequency and does not account for clock * tolerances and jitter. - * @clk_id: Clock identifier for the device for this request. + * @clk_id: Clock identifier for the device for this request. Set to + * 255 if clock ID is greater than or equal to 255. + * @clk_id_32: Clock identifier if @clk_id field is set to 255. * * NOTE: Normally clock frequency management is automatically done by TISCI * entity. In case of specific requests, TISCI evaluates capability to achieve @@ -436,13 +473,16 @@ struct ti_sci_msg_req_set_clock_freq { u64 target_freq_hz; u64 max_freq_hz; u8 clk_id; + u32 clk_id_32; } __packed; /** * struct ti_sci_msg_req_get_clock_freq - Request to get the clock frequency * @hdr: Generic Header * @dev_id: Device identifier this request is for - * @clk_id: Clock identifier for the device for this request. + * @clk_id: Clock identifier for the device for this request. Set to + * 255 if clock ID is greater than or equal to 255. + * @clk_id_32: Clock identifier if @clk_id field is set to 255. * * NOTE: Normally clock frequency management is automatically done by TISCI * entity. In some cases, clock frequencies are configured by host. @@ -454,6 +494,7 @@ struct ti_sci_msg_req_get_clock_freq { struct ti_sci_msg_hdr hdr; u32 dev_id; u8 clk_id; + u32 clk_id_32; } __packed; /** diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 568722a041bf..406e6717d252 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -166,29 +166,29 @@ struct ti_sci_dev_ops { * managed by driver for that purpose. */ struct ti_sci_clk_ops { - int (*get_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*get_clock)(const struct ti_sci_handle *handle, u32 did, u32 cid, bool needs_ssc, bool can_change_freq, bool enable_input_term); - int (*idle_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid); - int (*put_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid); - int (*is_auto)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*idle_clock)(const struct ti_sci_handle *handle, u32 did, u32 cid); + int (*put_clock)(const struct ti_sci_handle *handle, u32 did, u32 cid); + int (*is_auto)(const struct ti_sci_handle *handle, u32 did, u32 cid, bool *req_state); - int (*is_on)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*is_on)(const struct ti_sci_handle *handle, u32 did, u32 cid, bool *req_state, bool *current_state); - int (*is_off)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*is_off)(const struct ti_sci_handle *handle, u32 did, u32 cid, bool *req_state, bool *current_state); - int (*set_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid, - u8 parent_id); - int (*get_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid, - u8 *parent_id); + int (*set_parent)(const struct ti_sci_handle *handle, u32 did, u32 cid, + u32 parent_id); + int (*get_parent)(const struct ti_sci_handle *handle, u32 did, u32 cid, + u32 *parent_id); int (*get_num_parents)(const struct ti_sci_handle *handle, u32 did, - u8 cid, u8 *num_parents); + u32 cid, u32 *num_parents); int (*get_best_match_freq)(const struct ti_sci_handle *handle, u32 did, - u8 cid, u64 min_freq, u64 target_freq, + u32 cid, u64 min_freq, u64 target_freq, u64 max_freq, u64 *match_freq); - int (*set_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*set_freq)(const struct ti_sci_handle *handle, u32 did, u32 cid, u64 min_freq, u64 target_freq, u64 max_freq); - int (*get_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid, + int (*get_freq)(const struct ti_sci_handle *handle, u32 did, u32 cid, u64 *current_freq); }; -- cgit v1.2.3 From 6a80b30086b861b2591ba2a953042abd08c498e3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 10 Jun 2019 16:04:39 +0200 Subject: fmc: Delete the FMC subsystem The FMC subsystem was created in 2012 with the ambition to drive development of drivers for this hardware upstream. The current implementation has architectural flaws and would need to be revamped using real hardware to something that can reuse existing kernel abstractions in the subsystems for e.g. I2C, FPGA and GPIO. We have concluded that for the mainline kernel it will be better to delete the subsystem and start over with a clean slate when/if an active maintainer steps up. For details see: https://lkml.org/lkml/2018/10/29/534 Suggested-by: Federico Vaga Cc: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Federico Vaga Signed-off-by: Linus Walleij --- Documentation/fmc/API.txt | 47 ---- Documentation/fmc/FMC-and-SDB.txt | 88 -------- Documentation/fmc/carrier.txt | 311 -------------------------- Documentation/fmc/fmc-chardev.txt | 64 ------ Documentation/fmc/fmc-fakedev.txt | 36 --- Documentation/fmc/fmc-trivial.txt | 17 -- Documentation/fmc/fmc-write-eeprom.txt | 98 --------- Documentation/fmc/identifiers.txt | 168 -------------- Documentation/fmc/mezzanine.txt | 123 ----------- Documentation/fmc/parameters.txt | 56 ----- drivers/Kconfig | 2 - drivers/Makefile | 1 - drivers/fmc/Kconfig | 52 ----- drivers/fmc/Makefile | 15 -- drivers/fmc/fmc-chardev.c | 199 ----------------- drivers/fmc/fmc-core.c | 388 --------------------------------- drivers/fmc/fmc-debug.c | 172 --------------- drivers/fmc/fmc-dump.c | 58 ----- drivers/fmc/fmc-fakedev.c | 355 ------------------------------ drivers/fmc/fmc-match.c | 113 ---------- drivers/fmc/fmc-private.h | 8 - drivers/fmc/fmc-sdb.c | 219 ------------------- drivers/fmc/fmc-trivial.c | 102 --------- drivers/fmc/fmc-write-eeprom.c | 175 --------------- drivers/fmc/fru-parse.c | 80 ------- include/linux/fmc-sdb.h | 39 ---- include/linux/fmc.h | 271 ----------------------- 27 files changed, 3257 deletions(-) delete mode 100644 Documentation/fmc/API.txt delete mode 100644 Documentation/fmc/FMC-and-SDB.txt delete mode 100644 Documentation/fmc/carrier.txt delete mode 100644 Documentation/fmc/fmc-chardev.txt delete mode 100644 Documentation/fmc/fmc-fakedev.txt delete mode 100644 Documentation/fmc/fmc-trivial.txt delete mode 100644 Documentation/fmc/fmc-write-eeprom.txt delete mode 100644 Documentation/fmc/identifiers.txt delete mode 100644 Documentation/fmc/mezzanine.txt delete mode 100644 Documentation/fmc/parameters.txt delete mode 100644 drivers/fmc/Kconfig delete mode 100644 drivers/fmc/Makefile delete mode 100644 drivers/fmc/fmc-chardev.c delete mode 100644 drivers/fmc/fmc-core.c delete mode 100644 drivers/fmc/fmc-debug.c delete mode 100644 drivers/fmc/fmc-dump.c delete mode 100644 drivers/fmc/fmc-fakedev.c delete mode 100644 drivers/fmc/fmc-match.c delete mode 100644 drivers/fmc/fmc-private.h delete mode 100644 drivers/fmc/fmc-sdb.c delete mode 100644 drivers/fmc/fmc-trivial.c delete mode 100644 drivers/fmc/fmc-write-eeprom.c delete mode 100644 drivers/fmc/fru-parse.c delete mode 100644 include/linux/fmc-sdb.h delete mode 100644 include/linux/fmc.h (limited to 'include/linux') diff --git a/Documentation/fmc/API.txt b/Documentation/fmc/API.txt deleted file mode 100644 index 06b06b92c794..000000000000 --- a/Documentation/fmc/API.txt +++ /dev/null @@ -1,47 +0,0 @@ -Functions Exported by fmc.ko -**************************** - -The FMC core exports the usual 4 functions that are needed for a bus to -work, and a few more: - - int fmc_driver_register(struct fmc_driver *drv); - void fmc_driver_unregister(struct fmc_driver *drv); - int fmc_device_register(struct fmc_device *fmc); - void fmc_device_unregister(struct fmc_device *fmc); - - int fmc_device_register_n(struct fmc_device **fmc, int n); - void fmc_device_unregister_n(struct fmc_device **fmc, int n); - - uint32_t fmc_readl(struct fmc_device *fmc, int offset); - void fmc_writel(struct fmc_device *fmc, uint32_t val, int off); - void *fmc_get_drvdata(struct fmc_device *fmc); - void fmc_set_drvdata(struct fmc_device *fmc, void *data); - - int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw, - int sdb_entry); - -The data structure that describe a device is detailed in *note FMC -Device::, the one that describes a driver is detailed in *note FMC -Driver::. Please note that structures of type fmc_device must be -allocated by the caller, but must not be released after unregistering. -The fmc-bus itself takes care of releasing the structure when their use -count reaches zero - actually, the device model does that in lieu of us. - -The functions to register and unregister n devices are meant to be used -by carriers that host more than one mezzanine. The devices must all be -registered at the same time because if the FPGA is reprogrammed, all -devices in the array are affected. Usually, the driver matching the -first device will reprogram the FPGA, so other devices must know they -are already driven by a reprogrammed FPGA. - -If a carrier hosts slots that are driven by different FPGA devices, it -should register as a group only mezzanines that are driven by the same -FPGA, for the reason outlined above. - -Finally, the fmc_reprogram function calls the reprogram method (see -*note The API Offered by Carriers:: and also scans the memory area for -an SDB tree. You can pass -1 as sdb_entry to disable such scan. -Otherwise, the function fails if no tree is found at the specified -entry point. The function is meant to factorize common code, and by -the time you read this it is already used by the spec-sw and fine-delay -modules. diff --git a/Documentation/fmc/FMC-and-SDB.txt b/Documentation/fmc/FMC-and-SDB.txt deleted file mode 100644 index fa14e0b24521..000000000000 --- a/Documentation/fmc/FMC-and-SDB.txt +++ /dev/null @@ -1,88 +0,0 @@ - -FMC (FPGA Mezzanine Card) is the standard we use for our I/O devices, -in the context of White Rabbit and related hardware. - -In our I/O environments we need to write drivers for each mezzanine -card, and such drivers must work regardless of the carrier being used. -To achieve this, we abstract the FMC interface. - -We have a carrier for PCI-E called SPEC and one for VME called SVEC, -but more are planned. Also, we support stand-alone devices (usually -plugged on a SPEC card), controlled through Etherbone, developed by GSI. - -Code and documentation for the FMC bus was born as part of the spec-sw -project, but now it lives in its own project. Other projects, i.e. -software support for the various carriers, should include this as a -submodule. - -The most up to date version of code and documentation is always -available from the repository you can clone from: - - git://ohwr.org/fmc-projects/fmc-bus.git (read-only) - git@ohwr.org:fmc-projects/fmc-bus.git (read-write for developers) - -Selected versions of the documentation, as well as complete tar -archives for selected revisions are placed to the Files section of the -project: `http://www.ohwr.org/projects/fmc-bus/files' - - -What is FMC -*********** - -FMC, as said, stands for "FPGA Mezzanine Card". It is a standard -developed by the VME consortium called VITA (VMEbus International Trade -Association and ratified by ANSI, the American National Standard -Institute. The official documentation is called "ANSI-VITA 57.1". - -The FMC card is an almost square PCB, around 70x75 millimeters, that is -called mezzanine in this document. It usually lives plugged into -another PCB for power supply and control; such bigger circuit board is -called carrier from now on, and a single carrier may host more than one -mezzanine. - -In the typical application the mezzanine is mostly analog while the -carrier is mostly digital, and hosts an FPGA that must be configured to -match the specific mezzanine and the desired application. Thus, you may -need to load different FPGA images to drive different instances of the -same mezzanine. - -FMC, as such, is not a bus in the usual meaning of the term, because -most carriers have only one connector, and carriers with several -connectors have completely separate electrical connections to them. -This package, however, implements a bus as a software abstraction. - - -What is SDB -*********** - -SDB (Self Describing Bus) is a set of data structures that we use for -enumerating the internal structure of an FPGA image. We also use it as -a filesystem inside the FMC EEPROM. - -SDB is not mandatory for use of this FMC kernel bus, but if you have SDB -this package can make good use of it. SDB itself is developed in the -fpga-config-space OHWR project. The link to the repository is -`git://ohwr.org/hdl-core-lib/fpga-config-space.git' and what is used in -this project lives in the sdbfs subdirectory in there. - -SDB support for FMC is described in *note FMC Identification:: and -*note SDB Support:: - - -SDB Support -*********** - -The fmc.ko bus driver exports a few functions to help drivers taking -advantage of the SDB information that may be present in your own FPGA -memory image. - -The module exports the following functions, in the special header -. The linux/ prefix in the name is there because we -plan to submit it upstream in the future, and don't want to force -changes on our drivers if that happens. - - int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address); - void fmc_show_sdb_tree(struct fmc_device *fmc); - signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor, - uint32_t device, unsigned long *sz); - int fmc_free_sdb_tree(struct fmc_device *fmc); diff --git a/Documentation/fmc/carrier.txt b/Documentation/fmc/carrier.txt deleted file mode 100644 index 5e4f1dd3e98b..000000000000 --- a/Documentation/fmc/carrier.txt +++ /dev/null @@ -1,311 +0,0 @@ -FMC Device -********** - -Within the Linux bus framework, the FMC device is created and -registered by the carrier driver. For example, the PCI driver for the -SPEC card fills a data structure for each SPEC that it drives, and -registers an associated FMC device for each card. The SVEC driver can -do exactly the same for the VME carrier (actually, it should do it -twice, because the SVEC carries two FMC mezzanines). Similarly, an -Etherbone driver will be able to register its own FMC devices, offering -communication primitives through frame exchange. - -The contents of the EEPROM within the FMC are used for identification -purposes, i.e. for matching the device with its own driver. For this -reason the device structure includes a complete copy of the EEPROM -(actually, the carrier driver may choose whether or not to return it - -for example we most likely won't have the whole EEPROM available for -Etherbone devices. - -The following listing shows the current structure defining a device. -Please note that all the machinery is in place but some details may -still change in the future. For this reason, there is a version field -at the beginning of the structure. As usual, the minor number will -change for compatible changes (like a new flag) and the major number -will increase when an incompatible change happens (for example, a -change in layout of some fmc data structures). Device writers should -just set it to the value FMC_VERSION, and be ready to get back -EINVAL -at registration time. - - struct fmc_device { - unsigned long version; - unsigned long flags; - struct module *owner; /* char device must pin it */ - struct fmc_fru_id id; /* for EEPROM-based match */ - struct fmc_operations *op; /* carrier-provided */ - int irq; /* according to host bus. 0 == none */ - int eeprom_len; /* Usually 8kB, may be less */ - int eeprom_addr; /* 0x50, 0x52 etc */ - uint8_t *eeprom; /* Full contents or leading part */ - char *carrier_name; /* "SPEC" or similar, for special use */ - void *carrier_data; /* "struct spec *" or equivalent */ - __iomem void *fpga_base; /* May be NULL (Etherbone) */ - __iomem void *slot_base; /* Set by the driver */ - struct fmc_device **devarray; /* Allocated by the bus */ - int slot_id; /* Index in the slot array */ - int nr_slots; /* Number of slots in this carrier */ - unsigned long memlen; /* Used for the char device */ - struct device dev; /* For Linux use */ - struct device *hwdev; /* The underlying hardware device */ - unsigned long sdbfs_entry; - struct sdb_array *sdb; - uint32_t device_id; /* Filled by the device */ - char *mezzanine_name; /* Defaults to ``fmc'' */ - void *mezzanine_data; - }; - -The meaning of most fields is summarized in the code comment above. - -The following fields must be filled by the carrier driver before -registration: - - * version: must be set to FMC_VERSION. - - * owner: set to MODULE_OWNER. - - * op: the operations to act on the device. - - * irq: number for the mezzanine; may be zero. - - * eeprom_len: length of the following array. - - * eeprom_addr: 0x50 for first mezzanine and so on. - - * eeprom: the full content of the I2C EEPROM. - - * carrier_name. - - * carrier_data: a unique pointer for the carrier. - - * fpga_base: the I/O memory address (may be NULL). - - * slot_id: the index of this slot (starting from zero). - - * memlen: if fpga_base is valid, the length of I/O memory. - - * hwdev: to be used in some dev_err() calls. - - * device_id: a slot-specific unique integer number. - - -Please note that the carrier should read its own EEPROM memory before -registering the device, as well as fill all other fields listed above. - -The following fields should not be assigned, because they are filled -later by either the bus or the device driver: - - * flags. - - * fru_id: filled by the bus, parsing the eeprom. - - * slot_base: filled and used by the driver, if useful to it. - - * devarray: an array og all mezzanines driven by a singe FPGA. - - * nr_slots: set by the core at registration time. - - * dev: used by Linux. - - * sdb: FPGA contents, scanned according to driver's directions. - - * sdbfs_entry: SDB entry point in EEPROM: autodetected. - - * mezzanine_data: available for the driver. - - * mezzanine_name: filled by fmc-bus during identification. - - -Note: mezzanine_data may be redundant, because Linux offers the drvdata -approach, so the field may be removed in later versions of this bus -implementation. - -As I write this, she SPEC carrier is already completely functional in -the fmc-bus environment, and is a good reference to look at. - - -The API Offered by Carriers -=========================== - -The carrier provides a number of methods by means of the -`fmc_operations' structure, which currently is defined like this -(again, it is a moving target, please refer to the header rather than -this document): - - struct fmc_operations { - uint32_t (*readl)(struct fmc_device *fmc, int offset); - void (*writel)(struct fmc_device *fmc, uint32_t value, int offset); - int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw); - int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv); - int (*irq_request)(struct fmc_device *fmc, irq_handler_t h, - char *name, int flags); - void (*irq_ack)(struct fmc_device *fmc); - int (*irq_free)(struct fmc_device *fmc); - int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio, - int ngpio); - int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l); - int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l); - }; - -The individual methods perform the following tasks: - -`readl' -`writel' - These functions access FPGA registers by whatever means the - carrier offers. They are not expected to fail, and most of the time - they will just make a memory access to the host bus. If the - carrier provides a fpga_base pointer, the driver may use direct - access through that pointer. For this reason the header offers the - inline functions fmc_readl and fmc_writel that access fpga_base if - the respective method is NULL. A driver that wants to be portable - and efficient should use fmc_readl and fmc_writel. For Etherbone, - or other non-local carriers, error-management is still to be - defined. - -`validate' - Module parameters are used to manage different applications for - two or more boards of the same kind. Validation is based on the - busid module parameter, if provided, and returns the matching - index in the associated array. See *note Module Parameters:: in in - doubt. If no match is found, `-ENOENT' is returned; if the user - didn't pass `busid=', all devices will pass validation. The value - returned by the validate method can be used as index into other - parameters (for example, some drivers use the `lm32=' parameter in - this way). Such "generic parameters" are documented in *note - Module Parameters::, below. The validate method is used by - `fmc-trivial.ko', described in *note fmc-trivial::. - -`reprogram' - The carrier enumerates FMC devices by loading a standard (or - golden) FPGA binary that allows EEPROM access. Each driver, then, - will need to reprogram the FPGA by calling this function. If the - name argument is NULL, the carrier should reprogram the golden - binary. If the gateware name has been overridden through module - parameters (in a carrier-specific way) the file loaded will match - the parameters. Per-device gateware names can be specified using - the `gateware=' parameter, see *note Module Parameters::. Note: - Clients should call rhe new helper, fmc_reprogram, which both - calls this method and parse the SDB tree of the FPGA. - -`irq_request' -`irq_ack' -`irq_free' - Interrupt management is carrier-specific, so it is abstracted as - operations. The interrupt number is listed in the device - structure, and for the mezzanine driver the number is only - informative. The handler will receive the fmc pointer as dev_id; - the flags argument is passed to the Linux request_irq function, - but fmc-specific flags may be added in the future. You'll most - likely want to pass the `IRQF_SHARED' flag. - -`gpio_config' - The method allows to configure a GPIO pin in the carrier, and read - its current value if it is configured as input. See *note The GPIO - Abstraction:: for details. - -`read_ee' -`write_ee' - Read or write the EEPROM. The functions are expected to be only - called before reprogramming and the carrier should refuse them - with `ENODEV' after reprogramming. The offset is expected to be - within 8kB (the current size), but addresses up to 1MB are - reserved to fit bigger I2C devices in the future. Carriers may - offer access to other internal flash memories using these same - methods: for example the SPEC driver may define that its carrier - I2C memory is seen at offset 1M and the internal SPI flash is seen - at offset 16M. This multiplexing of several flash memories in the - same address space is carrier-specific and should only be used - by a driver that has verified the `carrier_name' field. - - - -The GPIO Abstraction -==================== - -Support for GPIO pins in the fmc-bus environment is not very -straightforward and deserves special discussion. - -While the general idea of a carrier-independent driver seems to fly, -configuration of specific signals within the carrier needs at least -some knowledge of the carrier itself. For this reason, the specific -driver can request to configure carrier-specific GPIO pins, numbered -from 0 to at most 4095. Configuration is performed by passing a -pointer to an array of struct fmc_gpio items, as well as the length of -the array. This is the data structure: - - struct fmc_gpio { - char *carrier_name; - int gpio; - int _gpio; /* internal use by the carrier */ - int mode; /* GPIOF_DIR_OUT etc, from */ - int irqmode; /* IRQF_TRIGGER_LOW and so on */ - }; - -By specifying a carrier_name for each pin, the driver may access -different pins in different carriers. The gpio_config method is -expected to return the number of pins successfully configured, ignoring -requests for other carriers. However, if no pin is configured (because -no structure at all refers to the current carrier_name), the operation -returns an error so the caller will know that it is running under a -yet-unsupported carrier. - -So, for example, a driver that has been developed and tested on both -the SPEC and the SVEC may request configuration of two different GPIO -pins, and expect one such configuration to succeed - if none succeeds -it most likely means that the current carrier is a still-unknown one. - -If, however, your GPIO pin has a specific known role, you can pass a -special number in the gpio field, using one of the following macros: - - #define FMC_GPIO_RAW(x) (x) /* 4096 of them */ - #define FMC_GPIO_IRQ(x) ((x) + 0x1000) /* 256 of them */ - #define FMC_GPIO_LED(x) ((x) + 0x1100) /* 256 of them */ - #define FMC_GPIO_KEY(x) ((x) + 0x1200) /* 256 of them */ - #define FMC_GPIO_TP(x) ((x) + 0x1300) /* 256 of them */ - #define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */ - -Use of virtual GPIO numbers (anything but FMC_GPIO_RAW) is allowed -provided the carrier_name field in the data structure is left -unspecified (NULL). Each carrier is responsible for providing a mapping -between virtual and physical GPIO numbers. The carrier may then use the -_gpio field to cache the result of this mapping. - -All carriers must map their I/O lines to the sets above starting from -zero. The SPEC, for example, maps interrupt pins 0 and 1, and test -points 0 through 3 (even if the test points on the PCB are called -5,6,7,8). - -If, for example, a driver requires a free LED and a test point (for a -scope probe to be plugged at some point during development) it may ask -for FMC_GPIO_LED(0) and FMC_GPIO_TP(0). Each carrier will provide -suitable GPIO pins. Clearly, the person running the drivers will know -the order used by the specific carrier driver in assigning leds and -testpoints, so to make a carrier-dependent use of the diagnostic tools. - -In theory, some form of autodetection should be possible: a driver like -the wr-nic (which uses IRQ(1) on the SPEC card) should configure -IRQ(0), make a test with software-generated interrupts and configure -IRQ(1) if the test fails. This probing step should be used because even -if the wr-nic gateware is known to use IRQ1 on the SPEC, the driver -should be carrier-independent and thus use IRQ(0) as a first bet - -actually, the knowledge that IRQ0 may fail is carrier-dependent -information, but using it doesn't make the driver unsuitable for other -carriers. - -The return value of gpio_config is defined as follows: - - * If no pin in the array can be used by the carrier, `-ENODEV'. - - * If at least one virtual GPIO number cannot be mapped, `-ENOENT'. - - * On success, 0 or positive. The value returned is the number of - high input bits (if no input is configured, the value for success - is 0). - -While I admit the procedure is not completely straightforward, it -allows configuration, input and output with a single carrier operation. -Given the typical use case of FMC devices, GPIO operations are not -expected to ever by in hot paths, and GPIO access so fare has only been -used to configure the interrupt pin, mode and polarity. Especially -reading inputs is not expected to be common. If your device has GPIO -capabilities in the hot path, you should consider using the kernel's -GPIO mechanisms. diff --git a/Documentation/fmc/fmc-chardev.txt b/Documentation/fmc/fmc-chardev.txt deleted file mode 100644 index d9ccb278e597..000000000000 --- a/Documentation/fmc/fmc-chardev.txt +++ /dev/null @@ -1,64 +0,0 @@ -fmc-chardev -=========== - -This is a simple generic driver, that allows user access by means of a -character device (actually, one for each mezzanine it takes hold of). - -The char device is created as a misc device. Its name in /dev (as -created by udev) is the same name as the underlying FMC device. Thus, -the name can be a silly fmc-0000 look-alike if the device has no -identifiers nor bus_id, a more specific fmc-0400 if the device has a -bus-specific address but no associated name, or something like -fdelay-0400 if the FMC core can rely on both a mezzanine name and a bus -address. - -Currently the driver only supports read and write: you can lseek to the -desired address and read or write a register. - -The driver assumes all registers are 32-bit in size, and only accepts a -single read or write per system call. However, as a result of Unix read -and write semantics, users can simply fread or fwrite bigger areas in -order to dump or store bigger memory areas. - -There is currently no support for mmap, user-space interrupt management -and DMA buffers. They may be added in later versions, if the need -arises. - -The example below shows raw access to a SPEC card programmed with its -golden FPGA file, that features an SDB structure at offset 256 - i.e. -64 words. The mezzanine's EEPROM in this case is not programmed, so the -default name is fmc-, and there are two cards in the system: - - spusa.root# insmod fmc-chardev.ko - [ 1073.339332] spec 0000:02:00.0: Driver has no ID: matches all - [ 1073.345051] spec 0000:02:00.0: Created misc device "fmc-0200" - [ 1073.350821] spec 0000:04:00.0: Driver has no ID: matches all - [ 1073.356525] spec 0000:04:00.0: Created misc device "fmc-0400" - spusa.root# ls -l /dev/fmc* - crw------- 1 root root 10, 58 Nov 20 19:23 /dev/fmc-0200 - crw------- 1 root root 10, 57 Nov 20 19:23 /dev/fmc-0400 - spusa.root# dd bs=4 skip=64 count=1 if=/dev/fmc-0200 2> /dev/null | od -t x1z - 0000000 2d 42 44 53 >-BDS< - 0000004 - -The simple program tools/fmc-mem in this package can access an FMC char -device and read or write a word or a whole area. Actually, the program -is not specific to FMC at all, it just uses lseek, read and write. - -Its first argument is the device name, the second the offset, the third -(if any) the value to write and the optional last argument that must -begin with "+" is the number of bytes to read or write. In case of -repeated reading data is written to stdout; repeated writes read from -stdin and the value argument is ignored. - -The following examples show reading the SDB magic number and the first -SDB record from a SPEC device programmed with its golden image: - - spusa.root# ./fmc-mem /dev/fmc-0200 100 - 5344422d - spusa.root# ./fmc-mem /dev/fmc-0200 100 +40 | od -Ax -t x1z - 000000 2d 42 44 53 00 01 02 00 00 00 00 00 00 00 00 00 >-BDS............< - 000010 00 00 00 00 ff 01 00 00 00 00 00 00 51 06 00 00 >............Q...< - 000020 c9 42 a5 e6 02 00 00 00 11 05 12 20 2d 34 42 57 >.B......... -4BW< - 000030 73 6f 72 43 72 61 62 73 49 53 47 2d 00 20 20 20 >sorCrabsISG-. < - 000040 diff --git a/Documentation/fmc/fmc-fakedev.txt b/Documentation/fmc/fmc-fakedev.txt deleted file mode 100644 index e85b74a4ae30..000000000000 --- a/Documentation/fmc/fmc-fakedev.txt +++ /dev/null @@ -1,36 +0,0 @@ -fmc-fakedev -=========== - -This package includes a software-only device, called fmc-fakedev, which -is able to register up to 4 mezzanines (by default it registers one). -Unlike the SPEC driver, which creates an FMC device for each PCI cards -it manages, this module creates a single instance of its set of -mezzanines. - -It is meant as the simplest possible example of how a driver should be -written, and it includes a fake EEPROM image (built using the tools -described in *note FMC Identification::),, which by default is -replicated for each fake mezzanine. - -You can also use this device to verify the match algorithms, by asking -it to test your own EEPROM image. You can provide the image by means of -the eeprom= module parameter: the new EEPROM image is loaded, as usual, -by means of the firmware loader. This example shows the defaults and a -custom EEPROM image: - - spusa.root# insmod fmc-fakedev.ko - [ 99.971247] fake-fmc-carrier: mezzanine 0 - [ 99.975393] Manufacturer: fake-vendor - [ 99.979624] Product name: fake-design-for-testing - spusa.root# rmmod fmc-fakedev - spusa.root# insmod fmc-fakedev.ko eeprom=fdelay-eeprom.bin - [ 121.447464] fake-fmc-carrier: Mezzanine 0: eeprom "fdelay-eeprom.bin" - [ 121.462725] fake-fmc-carrier: mezzanine 0 - [ 121.466858] Manufacturer: CERN - [ 121.470477] Product name: FmcDelay1ns4cha - spusa.root# rmmod fmc-fakedev - -After loading the device, you can use the write_ee method do modify its -own internal fake EEPROM: whenever the image is overwritten starting at -offset 0, the module will unregister and register again the FMC device. -This is shown in fmc-write-eeprom.txt diff --git a/Documentation/fmc/fmc-trivial.txt b/Documentation/fmc/fmc-trivial.txt deleted file mode 100644 index d1910bc67159..000000000000 --- a/Documentation/fmc/fmc-trivial.txt +++ /dev/null @@ -1,17 +0,0 @@ -fmc-trivial -=========== - -The simple module fmc-trivial is just a simple client that registers an -interrupt handler. I used it to verify the basic mechanism of the FMC -bus and how interrupts worked. - -The module implements the generic FMC parameters, so it can program a -different gateware file in each card. The whole list of parameters it -accepts are: - -`busid=' -`gateware=' - Generic parameters. See mezzanine.txt - - -This driver is worth reading, in my opinion. diff --git a/Documentation/fmc/fmc-write-eeprom.txt b/Documentation/fmc/fmc-write-eeprom.txt deleted file mode 100644 index e0a9712156aa..000000000000 --- a/Documentation/fmc/fmc-write-eeprom.txt +++ /dev/null @@ -1,98 +0,0 @@ -fmc-write-eeprom -================ - -This module is designed to load a binary file from /lib/firmware and to -write it to the internal EEPROM of the mezzanine card. This driver uses -the `busid' generic parameter. - -Overwriting the EEPROM is not something you should do daily, and it is -expected to only happen during manufacturing. For this reason, the -module makes it unlikely for the random user to change a working EEPROM. - -However, since the EEPROM may include application-specific information -other than the identification, later versions of this packages added -write-support through sysfs. See *note Accessing the EEPROM::. - -To avoid damaging the EEPROM content, the module takes the following -measures: - - * It accepts a `file=' argument (within /lib/firmware) and if no - such argument is received, it doesn't write anything to EEPROM - (i.e. there is no default file name). - - * If the file name ends with `.bin' it is written verbatim starting - at offset 0. - - * If the file name ends with `.tlv' it is interpreted as - type-length-value (i.e., it allows writev(2)-like operation). - - * If the file name doesn't match any of the patterns above, it is - ignored and no write is performed. - - * Only cards listed with `busid=' are written to. If no busid is - specified, no programming is done (and the probe function of the - driver will fail). - - -Each TLV tuple is formatted in this way: the header is 5 bytes, -followed by data. The first byte is `w' for write, the next two bytes -represent the address, in little-endian byte order, and the next two -represent the data length, in little-endian order. The length does not -include the header (it is the actual number of bytes to be written). - -This is a real example: that writes 5 bytes at position 0x110: - - spusa.root# od -t x1 -Ax /lib/firmware/try.tlv - 000000 77 10 01 05 00 30 31 32 33 34 - 00000a - spusa.root# insmod /tmp/fmc-write-eeprom.ko busid=0x0200 file=try.tlv - [19983.391498] spec 0000:03:00.0: write 5 bytes at 0x0110 - [19983.414615] spec 0000:03:00.0: write_eeprom: success - -Please note that you'll most likely want to use SDBFS to build your -EEPROM image, at least if your mezzanines are being used in the White -Rabbit environment. For this reason the TLV format is not expected to -be used much and is not expected to be developed further. - -If you want to try reflashing fake EEPROM devices, you can use the -fmc-fakedev.ko module (see *note fmc-fakedev::). Whenever you change -the image starting at offset 0, it will deregister and register again -after two seconds. Please note, however, that if fmc-write-eeprom is -still loaded, the system will associate it to the new device, which -will be reprogrammed and thus will be unloaded after two seconds. The -following example removes the module after it reflashed fakedev the -first time. - - spusa.root# insmod fmc-fakedev.ko - [ 72.984733] fake-fmc: Manufacturer: fake-vendor - [ 72.989434] fake-fmc: Product name: fake-design-for-testing - spusa.root# insmod fmc-write-eeprom.ko busid=0 file=fdelay-eeprom.bin; \ - rmmod fmc-write-eeprom - [ 130.874098] fake-fmc: Matching a generic driver (no ID) - [ 130.887845] fake-fmc: programming 6155 bytes - [ 130.894567] fake-fmc: write_eeprom: success - [ 132.895794] fake-fmc: Manufacturer: CERN - [ 132.899872] fake-fmc: Product name: FmcDelay1ns4cha - - -Accessing the EEPROM -===================== - -The bus creates a sysfs binary file called eeprom for each mezzanine it -knows about: - - spusa.root# cd /sys/bus/fmc/devices; ls -l */eeprom - -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcAdc100m14b4cha-0800/eeprom - -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDelay1ns4cha-0200/eeprom - -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDio5cha-0400/eeprom - -Everybody can read the files and the superuser can also modify it, but -the operation may on the carrier driver, if the carrier is unable to -access the I2C bus. For example, the spec driver can access the bus -only with its golden gateware: after a mezzanine driver reprogrammed -the FPGA with a custom circuit, the carrier is unable to access the -EEPROM and returns ENOTSUPP. - -An alternative way to write the EEPROM is the mezzanine driver -fmc-write-eeprom (See *note fmc-write-eeprom::), but the procedure is -more complex. diff --git a/Documentation/fmc/identifiers.txt b/Documentation/fmc/identifiers.txt deleted file mode 100644 index 3bb577ff0d52..000000000000 --- a/Documentation/fmc/identifiers.txt +++ /dev/null @@ -1,168 +0,0 @@ -FMC Identification -****************** - -The FMC standard requires every compliant mezzanine to carry -identification information in an I2C EEPROM. The information must be -laid out according to the "IPMI Platform Management FRU Information", -where IPMI is a lie I'd better not expand, and FRU means "Field -Replaceable Unit". - -The FRU information is an intricate unreadable binary blob that must -live at offset 0 of the EEPROM, and typically extends for a few hundred -bytes. The standard allows the application to use all the remaining -storage area of the EEPROM as it wants. - -This chapter explains how to create your own EEPROM image and how to -write it in your mezzanine, as well as how devices and drivers are -paired at run time. EEPROM programming uses tools that are part of this -package and SDB (part of the fpga-config-space package). - -The first sections are only interesting for manufacturers who need to -write the EEPROM. If you are just a software developer writing an FMC -device or driver, you may jump straight to *note SDB Support::. - - -Building the FRU Structure -========================== - -If you want to know the internals of the FRU structure and despair, you -can retrieve the document from -`http://download.intel.com/design/servers/ipmi/FRU1011.pdf' . The -standard is awful and difficult without reason, so we only support the -minimum mandatory subset - we create a simple structure and parse it -back at run time, but we are not able to either generate or parse more -arcane features like non-english languages and 6-bit text. If you need -more items of the FRU standard for your boards, please submit patches. - -This package includes the Python script that Matthieu Cattin wrote to -generate the FRU binary blob, based on an helper libipmi by Manohar -Vanga and Matthieu himself. I changed the test script to receive -parameters from the command line or from the environment (the command -line takes precedence) - -To make a long story short, in order to build a standard-compliant -binary file to be burned in your EEPROM, you need the following items: - - Environment Opt Official Name Default ---------------------------------------------------------------------- - FRU_VENDOR -v "Board Manufacturer" fmc-example - FRU_NAME -n "Board Product Name" mezzanine - FRU_SERIAL -s `Board Serial Number" 0001 - FRU_PART -p "Board Part Number" sample-part - FRU_OUTPUT -o not applicable /dev/stdout - -The "Official Name" above is what you find in the FRU official -documentation, chapter 11, page 7 ("Board Info Area Format"). The -output option is used to save the generated binary to a specific file -name instead of stdout. - -You can pass the items to the FRU generator either in the environment -or on the command line. This package has currently no support for -specifying power consumption or such stuff, but I plan to add it as -soon as I find some time for that. - -FIXME: consumption etc for FRU are here or in PTS? - -The following example creates a binary image for a specific board: - - ./tools/fru-generator -v CERN -n FmcAdc100m14b4cha \ - -s HCCFFIA___-CR000003 -p EDA-02063-V5-0 > eeprom.bin - -The following example shows a script that builds several binary EEPROM -images for a series of boards, changing the serial number for each of -them. The script uses a mix of environment variables and command line -options, and uses the same string patterns shown above. - - #!/bin/sh - - export FRU_VENDOR="CERN" - export FRU_NAME="FmcAdc100m14b4cha" - export FRU_PART="EDA-02063-V5-0" - - serial="HCCFFIA___-CR" - - for number in $(seq 1 50); do - # build number-string "ns" - ns="$(printf %06d $number)" - ./fru-generator -s "${serial}${ns}" > eeprom-${ns}.bin - done - - -Using SDB-FS in the EEPROM -========================== - -If you want to use SDB as a filesystem in the EEPROM device within the -mezzanine, you should create one such filesystem using gensdbfs, from -the fpga-config-space package on OHWR. - -By using an SBD filesystem you can cluster several files in a single -EEPROM, so both the host system and a soft-core running in the FPGA (if -any) can access extra production-time information. - -We chose to use SDB as a storage filesystem because the format is very -simple, and both the host system and the soft-core will likely already -include support code for such format. The SDB library offered by the -fpga-config-space is less than 1kB under LM32, so it proves quite up to -the task. - -The SDB entry point (which acts as a directory listing) cannot live at -offset zero in the flash device, because the FRU information must live -there. To avoid wasting precious storage space while still allowing -for more-than-minimal FRU structures, the fmc.ko will look for the SDB -record at address 256, 512 and 1024. - -In order to generate the complete EEPROM image you'll need a -configuration file for gensdbfs: you tell the program where to place -the sdb entry point, and you must force the FRU data file to be placed -at the beginning of the storage device. If needed, you can also place -other files at a special offset (we sometimes do it for backward -compatibility with drivers we wrote before implementing SDB for flash -memory). - -The directory tools/sdbfs of this package includes a well-commented -example that you may want to use as a starting point (the comments are -in the file called -SDB-CONFIG-). Reading documentation for gensdbfs -is a suggested first step anyways. - -This package (generic FMC bus support) only accesses two files in the -EEPROM: the FRU information, at offset zero, with a suggested filename -of IPMI-FRU and the short name for the mezzanine, in a file called -name. The IPMI-FRU name is not mandatory, but a strongly suggested -choice; the name filename is mandatory, because this is the preferred -short name used by the FMC core. For example, a name of "fdelay" may -supplement a Product Name like "FmcDelay1ns4cha" - exactly as -demonstrated in `tools/sdbfs'. - -Note: SDB access to flash memory is not yet supported, so the short -name currently in use is just the "Product Name" FRU string. - -The example in tools/sdbfs includes an extra file, that is needed by -the fine-delay driver, and must live at a known address of 0x1800. By -running gensdbfs on that directory you can output your binary EEPROM -image (here below spusa$ is the shell prompt): - - spusa$ ../fru-generator -v CERN -n FmcDelay1ns4cha -s proto-0 \ - -p EDA-02267-V3 > IPMI-FRU - spusa$ ls -l - total 16 - -rw-rw-r-- 1 rubini staff 975 Nov 19 18:08 --SDB-CONFIG-- - -rw-rw-r-- 1 rubini staff 216 Nov 19 18:13 IPMI-FRU - -rw-rw-r-- 1 rubini staff 11 Nov 19 18:04 fd-calib - -rw-rw-r-- 1 rubini staff 7 Nov 19 18:04 name - spusa$ sudo gensdbfs . /lib/firmware/fdelay-eeprom.bin - spusa$ sdb-read -l -e 0x100 /lib/firmware/fdelay-eeprom.bin - /home/rubini/wip/sdbfs/userspace/sdb-read: listing format is to be defined - 46696c6544617461:2e202020 00000100-000018ff . - 46696c6544617461:6e616d65 00000200-00000206 name - 46696c6544617461:66642d63 00001800-000018ff fd-calib - 46696c6544617461:49504d49 00000000-000000d7 IPMI-FRU - spusa$ ../fru-dump /lib/firmware/fdelay-eeprom.bin - /lib/firmware/fdelay-eeprom.bin: manufacturer: CERN - /lib/firmware/fdelay-eeprom.bin: product-name: FmcDelay1ns4cha - /lib/firmware/fdelay-eeprom.bin: serial-number: proto-0 - /lib/firmware/fdelay-eeprom.bin: part-number: EDA-02267-V3 - -As expected, the output file is both a proper sdbfs object and an IPMI -FRU information blob. The fd-calib file lives at offset 0x1800 and is -over-allocated to 256 bytes, according to the configuration file for -gensdbfs. diff --git a/Documentation/fmc/mezzanine.txt b/Documentation/fmc/mezzanine.txt deleted file mode 100644 index 87910dbfc91e..000000000000 --- a/Documentation/fmc/mezzanine.txt +++ /dev/null @@ -1,123 +0,0 @@ -FMC Driver -********** - -An FMC driver is concerned with the specific mezzanine and associated -gateware. As such, it is expected to be independent of the carrier -being used: it will perform I/O accesses only by means of -carrier-provided functions. - -The matching between device and driver is based on the content of the -EEPROM (as mandated by the FMC standard) or by the actual cores -configured in the FPGA; the latter technique is used when the FPGA is -already programmed when the device is registered to the bus core. - -In some special cases it is possible for a driver to directly access -FPGA registers, by means of the `fpga_base' field of the device -structure. This may be needed for high-bandwidth peripherals like fast -ADC cards. If the device module registered a remote device (for example -by means of Etherbone), the `fpga_base' pointer will be NULL. -Therefore, drivers must be ready to deal with NULL base pointers, and -fail gracefully. Most driver, however, are not expected to access the -pointer directly but run fmc_readl and fmc_writel instead, which will -work in any case. - -In even more special cases, the driver may access carrier-specific -functionality: the `carrier_name' string allows the driver to check -which is the current carrier and make use of the `carrier_data' -pointer. We chose to use carrier names rather than numeric identifiers -for greater flexibility, but also to avoid a central registry within -the `fmc.h' file - we hope other users will exploit our framework with -their own carriers. An example use of carrier names is in GPIO setup -(see *note The GPIO Abstraction::), although the name match is not -expected to be performed by the driver. If you depend on specific -carriers, please check the carrier name and fail gracefully if your -driver finds it is running in a yet-unknown-to-it environment. - - -ID Table -======== - -Like most other Linux drivers, and FMC driver must list all the devices -which it is able to drive. This is usually done by means of a device -table, but in FMC we can match hardware based either on the contents of -their EEPROM or on the actual FPGA cores that can be enumerated. -Therefore, we have two tables of identifiers. - -Matching of FRU information depends on two names, the manufacturer (or -vendor) and the device (see *note FMC Identification::); for -flexibility during production (i.e. before writing to the EEPROM) the -bus supports a catch-all driver that specifies NULL strings. For this -reason, the table is specified as pointer-and-length, not a a -null-terminated array - the entry with NULL names can be a valid entry. - -Matching on FPGA cores depends on two numeric fields: the 64-bit vendor -number and the 32-bit device number. Support for matching based on -class is not yet implemented. Each device is expected to be uniquely -identified by an array of cores (it matches if all of the cores are -instantiated), and for consistency the list is passed as -pointer-and-length. Several similar devices can be driven by the same -driver, and thus the driver specifies and array of such arrays. - -The complete set of involved data structures is thus the following: - - struct fmc_fru_id { char *manufacturer; char *product_name; }; - struct fmc_sdb_one_id { uint64_t vendor; uint32_t device; }; - struct fmc_sdb_id { struct fmc_sdb_one_id *cores; int cores_nr; }; - - struct fmc_device_id { - struct fmc_fru_id *fru_id; int fru_id_nr; - struct fmc_sdb_id *sdb_id; int sdb_id_nr; - }; - -A better reference, with full explanation, is the header. - - -Module Parameters -================= - -Most of the FMC drivers need the same set of kernel parameters. This -package includes support to implement common parameters by means of -fields in the `fmc_driver' structure and simple macro definitions. - -The parameters are carrier-specific, in that they rely on the busid -concept, that varies among carriers. For the SPEC, the identifier is a -PCI bus and devfn number, 16 bits wide in total; drivers for other -carriers will most likely offer something similar but not identical, -and some code duplication is unavoidable. - -This is the list of parameters that are common to several modules to -see how they are actually used, please look at spec-trivial.c. - -`busid=' - This is an array of integers, listing carrier-specific - identification numbers. For PIC, for example, `0x0400' represents - bus 4, slot 0. If any such ID is specified, the driver will only - accept to drive cards that appear in the list (even if the FMC ID - matches). This is accomplished by the validate carrier method. - -`gateware=' - The argument is an array of strings. If no busid= is specified, - the first string of gateware= is used for all cards; otherwise the - identifiers and gateware names are paired one by one, in the order - specified. - -`show_sdb=' - For modules supporting it, this parameter asks to show the SDB - internal structure by means of kernel messages. It is disabled by - default because those lines tend to hide more important messages, - if you look at the system console while loading the drivers. - Note: the parameter is being obsoleted, because fmc.ko itself now - supports dump_sdb= that applies to every client driver. - - -For example, if you are using the trivial driver to load two different -gateware files to two different cards, you can use the following -parameters to load different binaries to the cards, after looking up -the PCI identifiers. This has been tested with a SPEC carrier. - - insmod fmc-trivial.ko \ - busid=0x0200,0x0400 \ - gateware=fmc/fine-delay.bin,fmc/simple-dio.bin - -Please note that not all sub-modules support all of those parameters. -You can use modinfo to check what is supported by each module. diff --git a/Documentation/fmc/parameters.txt b/Documentation/fmc/parameters.txt deleted file mode 100644 index 59edf088e3a4..000000000000 --- a/Documentation/fmc/parameters.txt +++ /dev/null @@ -1,56 +0,0 @@ -Module Parameters in fmc.ko -*************************** - -The core driver receives two module parameters, meant to help debugging -client modules. Both parameters can be modified by writing to -/sys/module/fmc/parameters/, because they are used when client drivers -are devices are registered, not when fmc.ko is loaded. - -`dump_eeprom=' - If not zero, the parameter asks the bus controller to dump the - EEPROM of any device that is registered, using printk. - -`dump_sdb=' - If not zero, the parameter prints the SDB tree of every FPGA it is - loaded by fmc_reprogram(). If greater than one, it asks to dump - the binary content of SDB records. This currently only dumps the - top-level SDB array, though. - - -EEPROM dumping avoids repeating lines, since most of the contents is -usually empty and all bits are one or zero. This is an example of the -output: - - [ 6625.850480] spec 0000:02:00.0: FPGA programming successful - [ 6626.139949] spec 0000:02:00.0: Manufacturer: CERN - [ 6626.144666] spec 0000:02:00.0: Product name: FmcDelay1ns4cha - [ 6626.150370] FMC: mezzanine 0: 0000:02:00.0 on SPEC - [ 6626.155179] FMC: dumping eeprom 0x2000 (8192) bytes - [ 6626.160087] 0000: 01 00 00 01 00 0b 00 f3 01 0a 00 a5 85 87 c4 43 - [ 6626.167069] 0010: 45 52 4e cf 46 6d 63 44 65 6c 61 79 31 6e 73 34 - [ 6626.174019] 0020: 63 68 61 c7 70 72 6f 74 6f 2d 30 cc 45 44 41 2d - [ 6626.180975] 0030: 30 32 32 36 37 2d 56 33 da 32 30 31 32 2d 31 31 - [...] - [ 6626.371366] 0200: 66 64 65 6c 61 79 0a 00 00 00 00 00 00 00 00 00 - [ 6626.378359] 0210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - [ 6626.385361] [...] - [ 6626.387308] 1800: 70 6c 61 63 65 68 6f 6c 64 65 72 ff ff ff ff ff - [ 6626.394259] 1810: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff - [ 6626.401250] [...] - -The dump of SDB looks like the following; the example shows the simple -golden gateware for the SPEC card, removing the leading timestamps to -fit the page: - - spec 0000:02:00.0: SDB: 00000651:e6a542c9 WB4-Crossbar-GSI - spec 0000:02:00.0: SDB: 0000ce42:ff07fc47 WR-Periph-Syscon (00000000-000000ff) - FMC: mezzanine 0: 0000:02:00.0 on SPEC - FMC: poor dump of sdb first level: - 0000: 53 44 42 2d 00 02 01 00 00 00 00 00 00 00 00 00 - 0010: 00 00 00 00 00 00 01 ff 00 00 00 00 00 00 06 51 - 0020: e6 a5 42 c9 00 00 00 02 20 12 05 11 57 42 34 2d - 0030: 43 72 6f 73 73 62 61 72 2d 47 53 49 20 20 20 00 - 0040: 00 00 01 01 00 00 00 07 00 00 00 00 00 00 00 00 - 0050: 00 00 00 00 00 00 00 ff 00 00 00 00 00 00 ce 42 - 0060: ff 07 fc 47 00 00 00 01 20 12 03 05 57 52 2d 50 - 0070: 65 72 69 70 68 2d 53 79 73 63 6f 6e 20 20 20 01 diff --git a/drivers/Kconfig b/drivers/Kconfig index e8231663f201..61cf4ea2c229 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -188,8 +188,6 @@ source "drivers/ipack/Kconfig" source "drivers/reset/Kconfig" -source "drivers/fmc/Kconfig" - source "drivers/phy/Kconfig" source "drivers/powercap/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 28b030d7988d..6d37564e783c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -168,7 +168,6 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ -obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ obj-$(CONFIG_PERF_EVENTS) += perf/ diff --git a/drivers/fmc/Kconfig b/drivers/fmc/Kconfig deleted file mode 100644 index ae3d7f634932..000000000000 --- a/drivers/fmc/Kconfig +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# FMC (ANSI-VITA 57.1) bus support -# - -menuconfig FMC - tristate "FMC support" - help - - FMC (FPGA Mezzanine Carrier) is a mechanical and electrical - standard for mezzanine cards that plug into a carrier board. - This kernel subsystem supports the matching between carrier - and mezzanine based on identifiers stored in the internal I2C - EEPROM, as well as having carrier-independent drivers. - - The framework was born outside of the kernel and at this time - the off-tree code base is more complete. Code and documentation - is at git://ohwr.org/fmc-projects/fmc-bus.git . - -if FMC - -config FMC_FAKEDEV - tristate "FMC fake device (software testing)" - help - This is a fake carrier, bringing a default EEPROM content - that can be rewritten at run time and usef for matching - mezzanines. - -config FMC_TRIVIAL - tristate "FMC trivial mezzanine driver (software testing)" - help - This is a fake mezzanine driver, to show how FMC works and test it. - The driver also handles interrupts (we used it with a real carrier - before the mezzanines were produced) - -config FMC_WRITE_EEPROM - tristate "FMC mezzanine driver to write I2C EEPROM" - help - This driver matches every mezzanine device and can write the - internal EEPROM of the PCB, using the firmware loader to get - its binary and the function carrier->reprogram to actually do it. - It is useful when the mezzanines are produced. - -config FMC_CHARDEV - tristate "FMC mezzanine driver that registers a char device" - help - This driver matches every mezzanine device and allows user - space to read and write registers using a char device. It - can be used to write user-space drivers, or just get - acquainted with a mezzanine before writing its specific driver. - -endif # FMC diff --git a/drivers/fmc/Makefile b/drivers/fmc/Makefile deleted file mode 100644 index e3da6192cf39..000000000000 --- a/drivers/fmc/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-$(CONFIG_FMC) += fmc.o - -fmc-y = fmc-core.o -fmc-y += fmc-match.o -fmc-y += fmc-sdb.o -fmc-y += fru-parse.o -fmc-y += fmc-dump.o -fmc-y += fmc-debug.o - -obj-$(CONFIG_FMC_FAKEDEV) += fmc-fakedev.o -obj-$(CONFIG_FMC_TRIVIAL) += fmc-trivial.o -obj-$(CONFIG_FMC_WRITE_EEPROM) += fmc-write-eeprom.o -obj-$(CONFIG_FMC_CHARDEV) += fmc-chardev.o diff --git a/drivers/fmc/fmc-chardev.c b/drivers/fmc/fmc-chardev.c deleted file mode 100644 index 7d2091b5e978..000000000000 --- a/drivers/fmc/fmc-chardev.c +++ /dev/null @@ -1,199 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static LIST_HEAD(fc_devices); -static DEFINE_SPINLOCK(fc_lock); - -struct fc_instance { - struct list_head list; - struct fmc_device *fmc; - struct miscdevice misc; -}; - -/* at open time, we must identify our device */ -static int fc_open(struct inode *ino, struct file *f) -{ - struct fmc_device *fmc; - struct fc_instance *fc; - int minor = iminor(ino); - - list_for_each_entry(fc, &fc_devices, list) - if (fc->misc.minor == minor) - break; - if (fc->misc.minor != minor) - return -ENODEV; - fmc = fc->fmc; - if (try_module_get(fmc->owner) == 0) - return -ENODEV; - - f->private_data = fmc; - return 0; -} - -static int fc_release(struct inode *ino, struct file *f) -{ - struct fmc_device *fmc = f->private_data; - module_put(fmc->owner); - return 0; -} - -/* read and write are simple after the default llseek has been used */ -static ssize_t fc_read(struct file *f, char __user *buf, size_t count, - loff_t *offp) -{ - struct fmc_device *fmc = f->private_data; - unsigned long addr; - uint32_t val; - - if (count < sizeof(val)) - return -EINVAL; - count = sizeof(val); - - addr = *offp; - if (addr > fmc->memlen) - return -ESPIPE; /* Illegal seek */ - val = fmc_readl(fmc, addr); - if (copy_to_user(buf, &val, count)) - return -EFAULT; - *offp += count; - return count; -} - -static ssize_t fc_write(struct file *f, const char __user *buf, size_t count, - loff_t *offp) -{ - struct fmc_device *fmc = f->private_data; - unsigned long addr; - uint32_t val; - - if (count < sizeof(val)) - return -EINVAL; - count = sizeof(val); - - addr = *offp; - if (addr > fmc->memlen) - return -ESPIPE; /* Illegal seek */ - if (copy_from_user(&val, buf, count)) - return -EFAULT; - fmc_writel(fmc, val, addr); - *offp += count; - return count; -} - -static const struct file_operations fc_fops = { - .owner = THIS_MODULE, - .open = fc_open, - .release = fc_release, - .llseek = generic_file_llseek, - .read = fc_read, - .write = fc_write, -}; - - -/* Device part .. */ -static int fc_probe(struct fmc_device *fmc); -static int fc_remove(struct fmc_device *fmc); - -static struct fmc_driver fc_drv = { - .version = FMC_VERSION, - .driver.name = KBUILD_MODNAME, - .probe = fc_probe, - .remove = fc_remove, - /* no table: we want to match everything */ -}; - -/* We accept the generic busid parameter */ -FMC_PARAM_BUSID(fc_drv); - -/* probe and remove must allocate and release a misc device */ -static int fc_probe(struct fmc_device *fmc) -{ - int ret; - int index = 0; - - struct fc_instance *fc; - - index = fmc_validate(fmc, &fc_drv); - if (index < 0) - return -EINVAL; /* not our device: invalid */ - - /* Create a char device: we want to create it anew */ - fc = kzalloc(sizeof(*fc), GFP_KERNEL); - if (!fc) - return -ENOMEM; - fc->fmc = fmc; - fc->misc.minor = MISC_DYNAMIC_MINOR; - fc->misc.fops = &fc_fops; - fc->misc.name = kstrdup(dev_name(&fmc->dev), GFP_KERNEL); - - ret = misc_register(&fc->misc); - if (ret < 0) - goto out; - spin_lock(&fc_lock); - list_add(&fc->list, &fc_devices); - spin_unlock(&fc_lock); - dev_info(&fc->fmc->dev, "Created misc device \"%s\"\n", - fc->misc.name); - return 0; - -out: - kfree(fc->misc.name); - kfree(fc); - return ret; -} - -static int fc_remove(struct fmc_device *fmc) -{ - struct fc_instance *fc; - - list_for_each_entry(fc, &fc_devices, list) - if (fc->fmc == fmc) - break; - if (fc->fmc != fmc) { - dev_err(&fmc->dev, "remove called but not found\n"); - return -ENODEV; - } - - spin_lock(&fc_lock); - list_del(&fc->list); - spin_unlock(&fc_lock); - misc_deregister(&fc->misc); - kfree(fc->misc.name); - kfree(fc); - - return 0; -} - - -static int fc_init(void) -{ - int ret; - - ret = fmc_driver_register(&fc_drv); - return ret; -} - -static void fc_exit(void) -{ - fmc_driver_unregister(&fc_drv); -} - -module_init(fc_init); -module_exit(fc_exit); - -MODULE_LICENSE("GPL"); diff --git a/drivers/fmc/fmc-core.c b/drivers/fmc/fmc-core.c deleted file mode 100644 index 573f5471f680..000000000000 --- a/drivers/fmc/fmc-core.c +++ /dev/null @@ -1,388 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "fmc-private.h" - -static int fmc_check_version(unsigned long version, const char *name) -{ - if (__FMC_MAJOR(version) != FMC_MAJOR) { - pr_err("%s: \"%s\" has wrong major (has %li, expected %i)\n", - __func__, name, __FMC_MAJOR(version), FMC_MAJOR); - return -EINVAL; - } - - if (__FMC_MINOR(version) != FMC_MINOR) - pr_info("%s: \"%s\" has wrong minor (has %li, expected %i)\n", - __func__, name, __FMC_MINOR(version), FMC_MINOR); - return 0; -} - -static int fmc_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - /* struct fmc_device *fdev = to_fmc_device(dev); */ - - /* FIXME: The MODALIAS */ - add_uevent_var(env, "MODALIAS=%s", "fmc"); - return 0; -} - -static int fmc_probe(struct device *dev) -{ - struct fmc_driver *fdrv = to_fmc_driver(dev->driver); - struct fmc_device *fdev = to_fmc_device(dev); - - return fdrv->probe(fdev); -} - -static int fmc_remove(struct device *dev) -{ - struct fmc_driver *fdrv = to_fmc_driver(dev->driver); - struct fmc_device *fdev = to_fmc_device(dev); - - return fdrv->remove(fdev); -} - -static void fmc_shutdown(struct device *dev) -{ - /* not implemented but mandatory */ -} - -static struct bus_type fmc_bus_type = { - .name = "fmc", - .match = fmc_match, - .uevent = fmc_uevent, - .probe = fmc_probe, - .remove = fmc_remove, - .shutdown = fmc_shutdown, -}; - -static void fmc_release(struct device *dev) -{ - struct fmc_device *fmc = container_of(dev, struct fmc_device, dev); - - kfree(fmc); -} - -/* - * The eeprom is exported in sysfs, through a binary attribute - */ - -static ssize_t fmc_read_eeprom(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) -{ - struct device *dev; - struct fmc_device *fmc; - int eelen; - - dev = container_of(kobj, struct device, kobj); - fmc = container_of(dev, struct fmc_device, dev); - eelen = fmc->eeprom_len; - if (off > eelen) - return -ESPIPE; - if (off == eelen) - return 0; /* EOF */ - if (off + count > eelen) - count = eelen - off; - memcpy(buf, fmc->eeprom + off, count); - return count; -} - -static ssize_t fmc_write_eeprom(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) -{ - struct device *dev; - struct fmc_device *fmc; - - dev = container_of(kobj, struct device, kobj); - fmc = container_of(dev, struct fmc_device, dev); - return fmc->op->write_ee(fmc, off, buf, count); -} - -static struct bin_attribute fmc_eeprom_attr = { - .attr = { .name = "eeprom", .mode = S_IRUGO | S_IWUSR, }, - .size = 8192, /* more or less standard */ - .read = fmc_read_eeprom, - .write = fmc_write_eeprom, -}; - -int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, - char *name, int flags) -{ - if (fmc->op->irq_request) - return fmc->op->irq_request(fmc, h, name, flags); - return -EPERM; -} -EXPORT_SYMBOL(fmc_irq_request); - -void fmc_irq_free(struct fmc_device *fmc) -{ - if (fmc->op->irq_free) - fmc->op->irq_free(fmc); -} -EXPORT_SYMBOL(fmc_irq_free); - -void fmc_irq_ack(struct fmc_device *fmc) -{ - if (likely(fmc->op->irq_ack)) - fmc->op->irq_ack(fmc); -} -EXPORT_SYMBOL(fmc_irq_ack); - -int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv) -{ - if (fmc->op->validate) - return fmc->op->validate(fmc, drv); - return -EPERM; -} -EXPORT_SYMBOL(fmc_validate); - -int fmc_gpio_config(struct fmc_device *fmc, struct fmc_gpio *gpio, int ngpio) -{ - if (fmc->op->gpio_config) - return fmc->op->gpio_config(fmc, gpio, ngpio); - return -EPERM; -} -EXPORT_SYMBOL(fmc_gpio_config); - -int fmc_read_ee(struct fmc_device *fmc, int pos, void *d, int l) -{ - if (fmc->op->read_ee) - return fmc->op->read_ee(fmc, pos, d, l); - return -EPERM; -} -EXPORT_SYMBOL(fmc_read_ee); - -int fmc_write_ee(struct fmc_device *fmc, int pos, const void *d, int l) -{ - if (fmc->op->write_ee) - return fmc->op->write_ee(fmc, pos, d, l); - return -EPERM; -} -EXPORT_SYMBOL(fmc_write_ee); - -/* - * Functions for client modules follow - */ - -int fmc_driver_register(struct fmc_driver *drv) -{ - if (fmc_check_version(drv->version, drv->driver.name)) - return -EINVAL; - drv->driver.bus = &fmc_bus_type; - return driver_register(&drv->driver); -} -EXPORT_SYMBOL(fmc_driver_register); - -void fmc_driver_unregister(struct fmc_driver *drv) -{ - driver_unregister(&drv->driver); -} -EXPORT_SYMBOL(fmc_driver_unregister); - -/* - * When a device set is registered, all eeproms must be read - * and all FRUs must be parsed - */ -int fmc_device_register_n_gw(struct fmc_device **devs, int n, - struct fmc_gateware *gw) -{ - struct fmc_device *fmc, **devarray; - uint32_t device_id; - int i, ret = 0; - - if (n < 1) - return 0; - - /* Check the version of the first data structure (function prints) */ - if (fmc_check_version(devs[0]->version, devs[0]->carrier_name)) - return -EINVAL; - - devarray = kmemdup(devs, n * sizeof(*devs), GFP_KERNEL); - if (!devarray) - return -ENOMEM; - - /* Make all other checks before continuing, for all devices */ - for (i = 0; i < n; i++) { - fmc = devarray[i]; - if (!fmc->hwdev) { - pr_err("%s: device nr. %i has no hwdev pointer\n", - __func__, i); - ret = -EINVAL; - break; - } - if (fmc->flags & FMC_DEVICE_NO_MEZZANINE) { - dev_info(fmc->hwdev, "absent mezzanine in slot %d\n", - fmc->slot_id); - continue; - } - if (!fmc->eeprom) { - dev_err(fmc->hwdev, "no eeprom provided for slot %i\n", - fmc->slot_id); - ret = -EINVAL; - } - if (!fmc->eeprom_addr) { - dev_err(fmc->hwdev, "no eeprom_addr for slot %i\n", - fmc->slot_id); - ret = -EINVAL; - } - if (!fmc->carrier_name || !fmc->carrier_data || - !fmc->device_id) { - dev_err(fmc->hwdev, - "device nr %i: carrier name, " - "data or dev_id not set\n", i); - ret = -EINVAL; - } - if (ret) - break; - - } - if (ret) { - kfree(devarray); - return ret; - } - - /* Validation is ok. Now init and register the devices */ - for (i = 0; i < n; i++) { - fmc = devarray[i]; - - fmc->nr_slots = n; /* each slot must know how many are there */ - fmc->devarray = devarray; - - device_initialize(&fmc->dev); - fmc->dev.release = fmc_release; - fmc->dev.parent = fmc->hwdev; - - /* Fill the identification stuff (may fail) */ - fmc_fill_id_info(fmc); - - fmc->dev.bus = &fmc_bus_type; - - /* Name from mezzanine info or carrier info. Or 0,1,2.. */ - device_id = fmc->device_id; - if (!fmc->mezzanine_name) - dev_set_name(&fmc->dev, "fmc-%04x", device_id); - else - dev_set_name(&fmc->dev, "%s-%04x", fmc->mezzanine_name, - device_id); - - if (gw) { - /* - * The carrier already know the bitstream to load - * for this set of FMC mezzanines. - */ - ret = fmc->op->reprogram_raw(fmc, NULL, - gw->bitstream, gw->len); - if (ret) { - dev_warn(fmc->hwdev, - "Invalid gateware for FMC mezzanine\n"); - goto out; - } - } - - ret = device_add(&fmc->dev); - if (ret < 0) { - dev_err(fmc->hwdev, "Slot %i: Failed in registering " - "\"%s\"\n", fmc->slot_id, fmc->dev.kobj.name); - goto out; - } - ret = sysfs_create_bin_file(&fmc->dev.kobj, &fmc_eeprom_attr); - if (ret < 0) { - dev_err(&fmc->dev, "Failed in registering eeprom\n"); - goto out1; - } - /* This device went well, give information to the user */ - fmc_dump_eeprom(fmc); - fmc_debug_init(fmc); - } - return 0; - -out1: - device_del(&fmc->dev); -out: - kfree(devarray); - for (i--; i >= 0; i--) { - fmc_debug_exit(devs[i]); - sysfs_remove_bin_file(&devs[i]->dev.kobj, &fmc_eeprom_attr); - device_del(&devs[i]->dev); - fmc_free_id_info(devs[i]); - put_device(&devs[i]->dev); - } - return ret; - -} -EXPORT_SYMBOL(fmc_device_register_n_gw); - -int fmc_device_register_n(struct fmc_device **devs, int n) -{ - return fmc_device_register_n_gw(devs, n, NULL); -} -EXPORT_SYMBOL(fmc_device_register_n); - -int fmc_device_register_gw(struct fmc_device *fmc, struct fmc_gateware *gw) -{ - return fmc_device_register_n_gw(&fmc, 1, gw); -} -EXPORT_SYMBOL(fmc_device_register_gw); - -int fmc_device_register(struct fmc_device *fmc) -{ - return fmc_device_register_n(&fmc, 1); -} -EXPORT_SYMBOL(fmc_device_register); - -void fmc_device_unregister_n(struct fmc_device **devs, int n) -{ - int i; - - if (n < 1) - return; - - /* Free devarray first, not used by the later loop */ - kfree(devs[0]->devarray); - - for (i = 0; i < n; i++) { - fmc_debug_exit(devs[i]); - sysfs_remove_bin_file(&devs[i]->dev.kobj, &fmc_eeprom_attr); - device_del(&devs[i]->dev); - fmc_free_id_info(devs[i]); - put_device(&devs[i]->dev); - } -} -EXPORT_SYMBOL(fmc_device_unregister_n); - -void fmc_device_unregister(struct fmc_device *fmc) -{ - fmc_device_unregister_n(&fmc, 1); -} -EXPORT_SYMBOL(fmc_device_unregister); - -/* Init and exit are trivial */ -static int fmc_init(void) -{ - return bus_register(&fmc_bus_type); -} - -static void fmc_exit(void) -{ - bus_unregister(&fmc_bus_type); -} - -module_init(fmc_init); -module_exit(fmc_exit); - -MODULE_LICENSE("GPL"); diff --git a/drivers/fmc/fmc-debug.c b/drivers/fmc/fmc-debug.c deleted file mode 100644 index 1734c7cf0e76..000000000000 --- a/drivers/fmc/fmc-debug.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 CERN (www.cern.ch) - * Author: Federico Vaga - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define FMC_DBG_SDB_DUMP "dump_sdb" - -static char *__strip_trailing_space(char *buf, char *str, int len) -{ - int i = len - 1; - - memcpy(buf, str, len); - buf[len] = '\0'; - while (i >= 0 && buf[i] == ' ') - buf[i--] = '\0'; - return buf; -} - -#define __sdb_string(buf, field) ({ \ - BUILD_BUG_ON(sizeof(buf) < sizeof(field)); \ - __strip_trailing_space(buf, (void *)(field), sizeof(field)); \ - }) - -/** - * We do not check seq_printf() errors because we want to see things in any case - */ -static void fmc_sdb_dump_recursive(struct fmc_device *fmc, struct seq_file *s, - const struct sdb_array *arr) -{ - unsigned long base = arr->baseaddr; - int i, j, n = arr->len, level = arr->level; - char tmp[64]; - - for (i = 0; i < n; i++) { - union sdb_record *r; - struct sdb_product *p; - struct sdb_component *c; - - r = &arr->record[i]; - c = &r->dev.sdb_component; - p = &c->product; - - for (j = 0; j < level; j++) - seq_printf(s, " "); - switch (r->empty.record_type) { - case sdb_type_interconnect: - seq_printf(s, "%08llx:%08x %.19s\n", - __be64_to_cpu(p->vendor_id), - __be32_to_cpu(p->device_id), - p->name); - break; - case sdb_type_device: - seq_printf(s, "%08llx:%08x %.19s (%08llx-%08llx)\n", - __be64_to_cpu(p->vendor_id), - __be32_to_cpu(p->device_id), - p->name, - __be64_to_cpu(c->addr_first) + base, - __be64_to_cpu(c->addr_last) + base); - break; - case sdb_type_bridge: - seq_printf(s, "%08llx:%08x %.19s (bridge: %08llx)\n", - __be64_to_cpu(p->vendor_id), - __be32_to_cpu(p->device_id), - p->name, - __be64_to_cpu(c->addr_first) + base); - if (IS_ERR(arr->subtree[i])) { - seq_printf(s, "SDB: (bridge error %li)\n", - PTR_ERR(arr->subtree[i])); - break; - } - fmc_sdb_dump_recursive(fmc, s, arr->subtree[i]); - break; - case sdb_type_integration: - seq_printf(s, "integration\n"); - break; - case sdb_type_repo_url: - seq_printf(s, "Synthesis repository: %s\n", - __sdb_string(tmp, r->repo_url.repo_url)); - break; - case sdb_type_synthesis: - seq_printf(s, "Bitstream '%s' ", - __sdb_string(tmp, r->synthesis.syn_name)); - seq_printf(s, "synthesized %08x by %s ", - __be32_to_cpu(r->synthesis.date), - __sdb_string(tmp, r->synthesis.user_name)); - seq_printf(s, "(%s version %x), ", - __sdb_string(tmp, r->synthesis.tool_name), - __be32_to_cpu(r->synthesis.tool_version)); - seq_printf(s, "commit %pm\n", - r->synthesis.commit_id); - break; - case sdb_type_empty: - seq_printf(s, "empty\n"); - break; - default: - seq_printf(s, "UNKNOWN TYPE 0x%02x\n", - r->empty.record_type); - break; - } - } -} - -static int fmc_sdb_dump(struct seq_file *s, void *offset) -{ - struct fmc_device *fmc = s->private; - - if (!fmc->sdb) { - seq_printf(s, "no SDB information\n"); - return 0; - } - - seq_printf(s, "FMC: %s (%s), slot %i, device %s\n", dev_name(fmc->hwdev), - fmc->carrier_name, fmc->slot_id, dev_name(&fmc->dev)); - /* Dump SDB information */ - fmc_sdb_dump_recursive(fmc, s, fmc->sdb); - - return 0; -} - - -static int fmc_sdb_dump_open(struct inode *inode, struct file *file) -{ - struct fmc_device *fmc = inode->i_private; - - return single_open(file, fmc_sdb_dump, fmc); -} - - -const struct file_operations fmc_dbgfs_sdb_dump = { - .owner = THIS_MODULE, - .open = fmc_sdb_dump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int fmc_debug_init(struct fmc_device *fmc) -{ - fmc->dbg_dir = debugfs_create_dir(dev_name(&fmc->dev), NULL); - if (IS_ERR_OR_NULL(fmc->dbg_dir)) { - pr_err("FMC: Cannot create debugfs\n"); - return PTR_ERR(fmc->dbg_dir); - } - - fmc->dbg_sdb_dump = debugfs_create_file(FMC_DBG_SDB_DUMP, 0444, - fmc->dbg_dir, fmc, - &fmc_dbgfs_sdb_dump); - if (IS_ERR_OR_NULL(fmc->dbg_sdb_dump)) - pr_err("FMC: Cannot create debugfs file %s\n", - FMC_DBG_SDB_DUMP); - - return 0; -} - -void fmc_debug_exit(struct fmc_device *fmc) -{ - if (fmc->dbg_dir) - debugfs_remove_recursive(fmc->dbg_dir); -} diff --git a/drivers/fmc/fmc-dump.c b/drivers/fmc/fmc-dump.c deleted file mode 100644 index 6c81dbde1d16..000000000000 --- a/drivers/fmc/fmc-dump.c +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2013 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include -#include - -static int fmc_must_dump_eeprom; -module_param_named(dump_eeprom, fmc_must_dump_eeprom, int, 0644); - -#define LINELEN 16 - -/* Dumping 8k takes oh so much: avoid duplicate lines */ -static const uint8_t *dump_line(int addr, const uint8_t *line, - const uint8_t *prev) -{ - int i; - - if (!prev || memcmp(line, prev, LINELEN)) { - pr_info("%04x: ", addr); - for (i = 0; i < LINELEN; ) { - printk(KERN_CONT "%02x", line[i]); - i++; - printk(i & 3 ? " " : i & (LINELEN - 1) ? " " : "\n"); - } - return line; - } - /* repeated line */ - if (line == prev + LINELEN) - pr_info("[...]\n"); - return prev; -} - -void fmc_dump_eeprom(const struct fmc_device *fmc) -{ - const uint8_t *line, *prev; - int i; - - if (!fmc_must_dump_eeprom) - return; - - pr_info("FMC: %s (%s), slot %i, device %s\n", dev_name(fmc->hwdev), - fmc->carrier_name, fmc->slot_id, dev_name(&fmc->dev)); - pr_info("FMC: dumping eeprom 0x%x (%i) bytes\n", fmc->eeprom_len, - fmc->eeprom_len); - - line = fmc->eeprom; - prev = NULL; - for (i = 0; i < fmc->eeprom_len; i += LINELEN, line += LINELEN) - prev = dump_line(i, line, prev); -} diff --git a/drivers/fmc/fmc-fakedev.c b/drivers/fmc/fmc-fakedev.c deleted file mode 100644 index 941d0930969a..000000000000 --- a/drivers/fmc/fmc-fakedev.c +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * The software is provided "as is"; the copyright holders disclaim - * all warranties and liabilities, to the extent permitted by - * applicable law. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define FF_EEPROM_SIZE 8192 /* The standard eeprom size */ -#define FF_MAX_MEZZANINES 4 /* Fakes a multi-mezzanine carrier */ - -/* The user can pass up to 4 names of eeprom images to load */ -static char *ff_eeprom[FF_MAX_MEZZANINES]; -static int ff_nr_eeprom; -module_param_array_named(eeprom, ff_eeprom, charp, &ff_nr_eeprom, 0444); - -/* The user can ask for a multi-mezzanine carrier, with the default eeprom */ -static int ff_nr_dev = 1; -module_param_named(ndev, ff_nr_dev, int, 0444); - - -/* Lazily, don't support the "standard" module parameters */ - -/* - * Eeprom built from these commands: - - ../fru-generator -v fake-vendor -n fake-design-for-testing \ - -s 01234 -p none > IPMI-FRU - - gensdbfs . ../fake-eeprom.bin -*/ -static char ff_eeimg[FF_MAX_MEZZANINES][FF_EEPROM_SIZE] = { - { - 0x01, 0x00, 0x00, 0x01, 0x00, 0x0c, 0x00, 0xf2, 0x01, 0x0b, 0x00, 0xb2, - 0x86, 0x87, 0xcb, 0x66, 0x61, 0x6b, 0x65, 0x2d, 0x76, 0x65, 0x6e, 0x64, - 0x6f, 0x72, 0xd7, 0x66, 0x61, 0x6b, 0x65, 0x2d, 0x64, 0x65, 0x73, 0x69, - 0x67, 0x6e, 0x2d, 0x66, 0x6f, 0x72, 0x2d, 0x74, 0x65, 0x73, 0x74, 0x69, - 0x6e, 0x67, 0xc5, 0x30, 0x31, 0x32, 0x33, 0x34, 0xc4, 0x6e, 0x6f, 0x6e, - 0x65, 0xda, 0x32, 0x30, 0x31, 0x32, 0x2d, 0x31, 0x31, 0x2d, 0x31, 0x39, - 0x20, 0x32, 0x32, 0x3a, 0x34, 0x32, 0x3a, 0x33, 0x30, 0x2e, 0x30, 0x37, - 0x34, 0x30, 0x35, 0x35, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, - 0x02, 0x02, 0x0d, 0xf7, 0xf8, 0x02, 0xb0, 0x04, 0x74, 0x04, 0xec, 0x04, - 0x00, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x02, 0x02, 0x0d, 0x5c, 0x93, 0x01, - 0x4a, 0x01, 0x39, 0x01, 0x5a, 0x01, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x0b, - 0x02, 0x02, 0x0d, 0x63, 0x8c, 0x00, 0xfa, 0x00, 0xed, 0x00, 0x06, 0x01, - 0x00, 0x00, 0x00, 0x00, 0xa0, 0x0f, 0x01, 0x02, 0x0d, 0xfb, 0xf5, 0x05, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x02, 0x0d, 0xfc, 0xf4, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x0d, 0xfd, 0xf3, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xfa, 0x82, 0x0b, 0xea, 0x8f, 0xa2, 0x12, 0x00, 0x00, 0x1e, 0x44, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x53, 0x44, 0x42, 0x2d, 0x00, 0x03, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0xc4, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x61, 0x74, 0x61, - 0x2e, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x2e, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc0, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc4, 0x46, 0x69, 0x6c, 0x65, - 0x44, 0x61, 0x74, 0x61, 0x6e, 0x61, 0x6d, 0x65, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, - 0x46, 0x69, 0x6c, 0x65, 0x44, 0x61, 0x74, 0x61, 0x49, 0x50, 0x4d, 0x49, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x49, 0x50, 0x4d, 0x49, - 0x2d, 0x46, 0x52, 0x55, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x01, 0x66, 0x61, 0x6b, 0x65, 0x0a, - }, -}; - -struct ff_dev { - struct fmc_device *fmc[FF_MAX_MEZZANINES]; - struct device dev; -}; - -static struct ff_dev *ff_current_dev; /* We have 1 carrier, 1 slot */ - -static int ff_reprogram(struct fmc_device *fmc, struct fmc_driver *drv, - char *gw) -{ - const struct firmware *fw; - int ret; - - if (!gw) { - /* program golden: success */ - fmc->flags &= ~FMC_DEVICE_HAS_CUSTOM; - fmc->flags |= FMC_DEVICE_HAS_GOLDEN; - return 0; - } - - dev_info(&fmc->dev, "reprogramming with %s\n", gw); - ret = request_firmware(&fw, gw, &fmc->dev); - if (ret < 0) { - dev_warn(&fmc->dev, "request firmware \"%s\": error %i\n", - gw, ret); - goto out; - } - fmc->flags &= ~FMC_DEVICE_HAS_GOLDEN; - fmc->flags |= FMC_DEVICE_HAS_CUSTOM; - -out: - release_firmware(fw); - return ret; -} - -static int ff_irq_request(struct fmc_device *fmc, irq_handler_t handler, - char *name, int flags) -{ - return -EOPNOTSUPP; -} - -/* FIXME: should also have some fake FMC GPIO mapping */ - - -/* - * This work function is called when we changed the eeprom. It removes the - * current fmc device and registers a new one, with different identifiers. - */ -static struct ff_dev *ff_dev_create(void); /* defined later */ - -static void ff_work_fn(struct work_struct *work) -{ - struct ff_dev *ff = ff_current_dev; - int ret; - - fmc_device_unregister_n(ff->fmc, ff_nr_dev); - device_unregister(&ff->dev); - ff_current_dev = NULL; - - ff = ff_dev_create(); - if (IS_ERR(ff)) { - pr_warning("%s: can't re-create FMC devices\n", __func__); - return; - } - ret = fmc_device_register_n(ff->fmc, ff_nr_dev); - if (ret < 0) { - dev_warn(&ff->dev, "can't re-register FMC devices\n"); - device_unregister(&ff->dev); - return; - } - - ff_current_dev = ff; -} - -static DECLARE_DELAYED_WORK(ff_work, ff_work_fn); - - -/* low-level i2c */ -static int ff_eeprom_read(struct fmc_device *fmc, uint32_t offset, - void *buf, size_t size) -{ - if (offset > FF_EEPROM_SIZE) - return -EINVAL; - if (offset + size > FF_EEPROM_SIZE) - size = FF_EEPROM_SIZE - offset; - memcpy(buf, fmc->eeprom + offset, size); - return size; -} - -static int ff_eeprom_write(struct fmc_device *fmc, uint32_t offset, - const void *buf, size_t size) -{ - if (offset > FF_EEPROM_SIZE) - return -EINVAL; - if (offset + size > FF_EEPROM_SIZE) - size = FF_EEPROM_SIZE - offset; - dev_info(&fmc->dev, "write_eeprom: offset %i, size %zi\n", - (int)offset, size); - memcpy(fmc->eeprom + offset, buf, size); - schedule_delayed_work(&ff_work, HZ * 2); /* remove, replug, in 2s */ - return size; -} - -/* i2c operations for fmc */ -static int ff_read_ee(struct fmc_device *fmc, int pos, void *data, int len) -{ - if (!(fmc->flags & FMC_DEVICE_HAS_GOLDEN)) - return -EOPNOTSUPP; - return ff_eeprom_read(fmc, pos, data, len); -} - -static int ff_write_ee(struct fmc_device *fmc, int pos, - const void *data, int len) -{ - if (!(fmc->flags & FMC_DEVICE_HAS_GOLDEN)) - return -EOPNOTSUPP; - return ff_eeprom_write(fmc, pos, data, len); -} - -/* readl and writel do not do anything. Don't waste RAM with "base" */ -static uint32_t ff_readl(struct fmc_device *fmc, int offset) -{ - return 0; -} - -static void ff_writel(struct fmc_device *fmc, uint32_t value, int offset) -{ - return; -} - -/* validate is useful so fmc-write-eeprom will not reprogram every 2 seconds */ -static int ff_validate(struct fmc_device *fmc, struct fmc_driver *drv) -{ - int i; - - if (!drv->busid_n) - return 0; /* everyhing is valid */ - for (i = 0; i < drv->busid_n; i++) - if (drv->busid_val[i] == fmc->device_id) - return i; - return -ENOENT; -} - - - -static struct fmc_operations ff_fmc_operations = { - .read32 = ff_readl, - .write32 = ff_writel, - .reprogram = ff_reprogram, - .irq_request = ff_irq_request, - .read_ee = ff_read_ee, - .write_ee = ff_write_ee, - .validate = ff_validate, -}; - -/* This device is kmalloced: release it */ -static void ff_dev_release(struct device *dev) -{ - struct ff_dev *ff = container_of(dev, struct ff_dev, dev); - kfree(ff); -} - -static struct fmc_device ff_template_fmc = { - .version = FMC_VERSION, - .owner = THIS_MODULE, - .carrier_name = "fake-fmc-carrier", - .device_id = 0xf001, /* fool */ - .eeprom_len = sizeof(ff_eeimg[0]), - .memlen = 0x1000, /* 4k, to show something */ - .op = &ff_fmc_operations, - .hwdev = NULL, /* filled at creation time */ - .flags = FMC_DEVICE_HAS_GOLDEN, -}; - -static struct ff_dev *ff_dev_create(void) -{ - struct ff_dev *ff; - struct fmc_device *fmc; - int i, ret; - - ff = kzalloc(sizeof(*ff), GFP_KERNEL); - if (!ff) - return ERR_PTR(-ENOMEM); - dev_set_name(&ff->dev, "fake-fmc-carrier"); - ff->dev.release = ff_dev_release; - - ret = device_register(&ff->dev); - if (ret < 0) { - put_device(&ff->dev); - return ERR_PTR(ret); - } - - /* Create fmc structures that refer to this new "hw" device */ - for (i = 0; i < ff_nr_dev; i++) { - fmc = kmemdup(&ff_template_fmc, sizeof(ff_template_fmc), - GFP_KERNEL); - fmc->hwdev = &ff->dev; - fmc->carrier_data = ff; - fmc->nr_slots = ff_nr_dev; - /* the following fields are different for each slot */ - fmc->eeprom = ff_eeimg[i]; - fmc->eeprom_addr = 0x50 + 2 * i; - fmc->slot_id = i; - ff->fmc[i] = fmc; - /* increment the identifier, each must be different */ - ff_template_fmc.device_id++; - } - return ff; -} - -/* init and exit */ -static int ff_init(void) -{ - struct ff_dev *ff; - const struct firmware *fw; - int i, len, ret = 0; - - /* Replicate the default eeprom for the max number of mezzanines */ - for (i = 1; i < FF_MAX_MEZZANINES; i++) - memcpy(ff_eeimg[i], ff_eeimg[0], sizeof(ff_eeimg[0])); - - if (ff_nr_eeprom > ff_nr_dev) - ff_nr_dev = ff_nr_eeprom; - - ff = ff_dev_create(); - if (IS_ERR(ff)) - return PTR_ERR(ff); - - /* If the user passed "eeprom=" as a parameter, fetch them */ - for (i = 0; i < ff_nr_eeprom; i++) { - if (!strlen(ff_eeprom[i])) - continue; - ret = request_firmware(&fw, ff_eeprom[i], &ff->dev); - if (ret < 0) { - dev_err(&ff->dev, "Mezzanine %i: can't load \"%s\" " - "(error %i)\n", i, ff_eeprom[i], -ret); - } else { - len = min_t(size_t, fw->size, (size_t)FF_EEPROM_SIZE); - memcpy(ff_eeimg[i], fw->data, len); - release_firmware(fw); - dev_info(&ff->dev, "Mezzanine %i: eeprom \"%s\"\n", i, - ff_eeprom[i]); - } - } - - ret = fmc_device_register_n(ff->fmc, ff_nr_dev); - if (ret) { - device_unregister(&ff->dev); - return ret; - } - ff_current_dev = ff; - return ret; -} - -static void ff_exit(void) -{ - if (ff_current_dev) { - fmc_device_unregister_n(ff_current_dev->fmc, ff_nr_dev); - device_unregister(&ff_current_dev->dev); - } - cancel_delayed_work_sync(&ff_work); -} - -module_init(ff_init); -module_exit(ff_exit); - -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/fmc/fmc-match.c b/drivers/fmc/fmc-match.c deleted file mode 100644 index 995bd6041a67..000000000000 --- a/drivers/fmc/fmc-match.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include - -/* The fru parser is both user and kernel capable: it needs alloc */ -void *fru_alloc(size_t size) -{ - return kzalloc(size, GFP_KERNEL); -} - -/* The actual match function */ -int fmc_match(struct device *dev, struct device_driver *drv) -{ - struct fmc_driver *fdrv = to_fmc_driver(drv); - struct fmc_device *fdev = to_fmc_device(dev); - struct fmc_fru_id *fid; - int i, matched = 0; - - /* This currently only matches the EEPROM (FRU id) */ - fid = fdrv->id_table.fru_id; - if (!fid) { - dev_warn(&fdev->dev, "Driver has no ID: matches all\n"); - matched = 1; - } else { - if (!fdev->id.manufacturer || !fdev->id.product_name) - return 0; /* the device has no FRU information */ - for (i = 0; i < fdrv->id_table.fru_id_nr; i++, fid++) { - if (fid->manufacturer && - strcmp(fid->manufacturer, fdev->id.manufacturer)) - continue; - if (fid->product_name && - strcmp(fid->product_name, fdev->id.product_name)) - continue; - matched = 1; - break; - } - } - - /* FIXME: match SDB contents */ - return matched; -} - -/* This function creates ID info for a newly registered device */ -int fmc_fill_id_info(struct fmc_device *fmc) -{ - struct fru_common_header *h; - struct fru_board_info_area *bia; - int ret, allocated = 0; - - /* If we know the eeprom length, try to read it off the device */ - if (fmc->eeprom_len && !fmc->eeprom) { - fmc->eeprom = kzalloc(fmc->eeprom_len, GFP_KERNEL); - if (!fmc->eeprom) - return -ENOMEM; - allocated = 1; - ret = fmc_read_ee(fmc, 0, fmc->eeprom, fmc->eeprom_len); - if (ret < 0) - goto out; - } - - /* If no eeprom, continue with other matches */ - if (!fmc->eeprom) - return 0; - - dev_info(fmc->hwdev, "mezzanine %i\n", fmc->slot_id); /* header */ - - /* So we have the eeprom: parse the FRU part (if any) */ - h = (void *)fmc->eeprom; - if (h->format != 1) { - pr_info(" EEPROM has no FRU information\n"); - goto out; - } - if (!fru_header_cksum_ok(h)) { - pr_info(" FRU: wrong header checksum\n"); - goto out; - } - bia = fru_get_board_area(h); - if (!fru_bia_cksum_ok(bia)) { - pr_info(" FRU: wrong board area checksum\n"); - goto out; - } - fmc->id.manufacturer = fru_get_board_manufacturer(h); - fmc->id.product_name = fru_get_product_name(h); - pr_info(" Manufacturer: %s\n", fmc->id.manufacturer); - pr_info(" Product name: %s\n", fmc->id.product_name); - - /* Create the short name (FIXME: look in sdb as well) */ - fmc->mezzanine_name = kstrdup(fmc->id.product_name, GFP_KERNEL); - -out: - if (allocated) { - kfree(fmc->eeprom); - fmc->eeprom = NULL; - } - return 0; /* no error: let other identification work */ -} - -/* Some ID data is allocated using fru_alloc() above, so release it */ -void fmc_free_id_info(struct fmc_device *fmc) -{ - kfree(fmc->mezzanine_name); - kfree(fmc->id.manufacturer); - kfree(fmc->id.product_name); -} diff --git a/drivers/fmc/fmc-private.h b/drivers/fmc/fmc-private.h deleted file mode 100644 index 93cb8030f764..000000000000 --- a/drivers/fmc/fmc-private.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 CERN (www.cern.ch) - * Author: Federico Vaga - */ - -extern int fmc_debug_init(struct fmc_device *fmc); -extern void fmc_debug_exit(struct fmc_device *fmc); diff --git a/drivers/fmc/fmc-sdb.c b/drivers/fmc/fmc-sdb.c deleted file mode 100644 index 14758db1a5fb..000000000000 --- a/drivers/fmc/fmc-sdb.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include -#include -#include -#include - -static uint32_t __sdb_rd(struct fmc_device *fmc, unsigned long address, - int convert) -{ - uint32_t res = fmc_readl(fmc, address); - if (convert) - return __be32_to_cpu(res); - return res; -} - -static struct sdb_array *__fmc_scan_sdb_tree(struct fmc_device *fmc, - unsigned long sdb_addr, - unsigned long reg_base, int level) -{ - uint32_t onew; - int i, j, n, convert = 0; - struct sdb_array *arr, *sub; - - onew = fmc_readl(fmc, sdb_addr); - if (onew == SDB_MAGIC) { - /* Uh! If we are little-endian, we must convert */ - if (SDB_MAGIC != __be32_to_cpu(SDB_MAGIC)) - convert = 1; - } else if (onew == __be32_to_cpu(SDB_MAGIC)) { - /* ok, don't convert */ - } else { - return ERR_PTR(-ENOENT); - } - /* So, the magic was there: get the count from offset 4*/ - onew = __sdb_rd(fmc, sdb_addr + 4, convert); - n = __be16_to_cpu(*(uint16_t *)&onew); - arr = kzalloc(sizeof(*arr), GFP_KERNEL); - if (!arr) - return ERR_PTR(-ENOMEM); - arr->record = kcalloc(n, sizeof(arr->record[0]), GFP_KERNEL); - arr->subtree = kcalloc(n, sizeof(arr->subtree[0]), GFP_KERNEL); - if (!arr->record || !arr->subtree) { - kfree(arr->record); - kfree(arr->subtree); - kfree(arr); - return ERR_PTR(-ENOMEM); - } - - arr->len = n; - arr->level = level; - arr->fmc = fmc; - for (i = 0; i < n; i++) { - union sdb_record *r; - - for (j = 0; j < sizeof(arr->record[0]); j += 4) { - *(uint32_t *)((void *)(arr->record + i) + j) = - __sdb_rd(fmc, sdb_addr + (i * 64) + j, convert); - } - r = &arr->record[i]; - arr->subtree[i] = ERR_PTR(-ENODEV); - if (r->empty.record_type == sdb_type_bridge) { - struct sdb_component *c = &r->bridge.sdb_component; - uint64_t subaddr = __be64_to_cpu(r->bridge.sdb_child); - uint64_t newbase = __be64_to_cpu(c->addr_first); - - subaddr += reg_base; - newbase += reg_base; - sub = __fmc_scan_sdb_tree(fmc, subaddr, newbase, - level + 1); - arr->subtree[i] = sub; /* may be error */ - if (IS_ERR(sub)) - continue; - sub->parent = arr; - sub->baseaddr = newbase; - } - } - return arr; -} - -int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address) -{ - struct sdb_array *ret; - if (fmc->sdb) - return -EBUSY; - ret = __fmc_scan_sdb_tree(fmc, address, 0 /* regs */, 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - fmc->sdb = ret; - return 0; -} -EXPORT_SYMBOL(fmc_scan_sdb_tree); - -static void __fmc_sdb_free(struct sdb_array *arr) -{ - int i, n; - - if (!arr) - return; - n = arr->len; - for (i = 0; i < n; i++) { - if (IS_ERR(arr->subtree[i])) - continue; - __fmc_sdb_free(arr->subtree[i]); - } - kfree(arr->record); - kfree(arr->subtree); - kfree(arr); -} - -int fmc_free_sdb_tree(struct fmc_device *fmc) -{ - __fmc_sdb_free(fmc->sdb); - fmc->sdb = NULL; - return 0; -} -EXPORT_SYMBOL(fmc_free_sdb_tree); - -/* This helper calls reprogram and inizialized sdb as well */ -int fmc_reprogram_raw(struct fmc_device *fmc, struct fmc_driver *d, - void *gw, unsigned long len, int sdb_entry) -{ - int ret; - - ret = fmc->op->reprogram_raw(fmc, d, gw, len); - if (ret < 0) - return ret; - if (sdb_entry < 0) - return ret; - - /* We are required to find SDB at a given offset */ - ret = fmc_scan_sdb_tree(fmc, sdb_entry); - if (ret < 0) { - dev_err(&fmc->dev, "Can't find SDB at address 0x%x\n", - sdb_entry); - return -ENODEV; - } - - return 0; -} -EXPORT_SYMBOL(fmc_reprogram_raw); - -/* This helper calls reprogram and inizialized sdb as well */ -int fmc_reprogram(struct fmc_device *fmc, struct fmc_driver *d, char *gw, - int sdb_entry) -{ - int ret; - - ret = fmc->op->reprogram(fmc, d, gw); - if (ret < 0) - return ret; - if (sdb_entry < 0) - return ret; - - /* We are required to find SDB at a given offset */ - ret = fmc_scan_sdb_tree(fmc, sdb_entry); - if (ret < 0) { - dev_err(&fmc->dev, "Can't find SDB at address 0x%x\n", - sdb_entry); - return -ENODEV; - } - - return 0; -} -EXPORT_SYMBOL(fmc_reprogram); - -void fmc_show_sdb_tree(const struct fmc_device *fmc) -{ - pr_err("%s: not supported anymore, use debugfs to dump SDB\n", - __func__); -} -EXPORT_SYMBOL(fmc_show_sdb_tree); - -signed long fmc_find_sdb_device(struct sdb_array *tree, - uint64_t vid, uint32_t did, unsigned long *sz) -{ - signed long res = -ENODEV; - union sdb_record *r; - struct sdb_product *p; - struct sdb_component *c; - int i, n = tree->len; - uint64_t last, first; - - /* FIXME: what if the first interconnect is not at zero? */ - for (i = 0; i < n; i++) { - r = &tree->record[i]; - c = &r->dev.sdb_component; - p = &c->product; - - if (!IS_ERR(tree->subtree[i])) - res = fmc_find_sdb_device(tree->subtree[i], - vid, did, sz); - if (res >= 0) - return res + tree->baseaddr; - if (r->empty.record_type != sdb_type_device) - continue; - if (__be64_to_cpu(p->vendor_id) != vid) - continue; - if (__be32_to_cpu(p->device_id) != did) - continue; - /* found */ - last = __be64_to_cpu(c->addr_last); - first = __be64_to_cpu(c->addr_first); - if (sz) - *sz = (typeof(*sz))(last + 1 - first); - return first + tree->baseaddr; - } - return res; -} -EXPORT_SYMBOL(fmc_find_sdb_device); diff --git a/drivers/fmc/fmc-trivial.c b/drivers/fmc/fmc-trivial.c deleted file mode 100644 index b99dbc7ee203..000000000000 --- a/drivers/fmc/fmc-trivial.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * The software is provided "as is"; the copyright holders disclaim - * all warranties and liabilities, to the extent permitted by - * applicable law. - */ - -/* A trivial fmc driver that can load a gateware file and reports interrupts */ -#include -#include -#include -#include - -static struct fmc_driver t_drv; /* initialized later */ - -static irqreturn_t t_handler(int irq, void *dev_id) -{ - struct fmc_device *fmc = dev_id; - - fmc_irq_ack(fmc); - dev_info(&fmc->dev, "received irq %i\n", irq); - return IRQ_HANDLED; -} - -static struct fmc_gpio t_gpio[] = { - { - .gpio = FMC_GPIO_IRQ(0), - .mode = GPIOF_DIR_IN, - .irqmode = IRQF_TRIGGER_RISING, - }, { - .gpio = FMC_GPIO_IRQ(1), - .mode = GPIOF_DIR_IN, - .irqmode = IRQF_TRIGGER_RISING, - } -}; - -static int t_probe(struct fmc_device *fmc) -{ - int ret; - int index = 0; - - index = fmc_validate(fmc, &t_drv); - if (index < 0) - return -EINVAL; /* not our device: invalid */ - - ret = fmc_irq_request(fmc, t_handler, "fmc-trivial", IRQF_SHARED); - if (ret < 0) - return ret; - /* ignore error code of call below, we really don't care */ - fmc_gpio_config(fmc, t_gpio, ARRAY_SIZE(t_gpio)); - - ret = fmc_reprogram(fmc, &t_drv, "", 0); - if (ret == -EPERM) /* programming not supported */ - ret = 0; - if (ret < 0) - fmc_irq_free(fmc); - - /* FIXME: reprogram LM32 too */ - return ret; -} - -static int t_remove(struct fmc_device *fmc) -{ - fmc_irq_free(fmc); - return 0; -} - -static struct fmc_driver t_drv = { - .version = FMC_VERSION, - .driver.name = KBUILD_MODNAME, - .probe = t_probe, - .remove = t_remove, - /* no table, as the current match just matches everything */ -}; - - /* We accept the generic parameters */ -FMC_PARAM_BUSID(t_drv); -FMC_PARAM_GATEWARE(t_drv); - -static int t_init(void) -{ - int ret; - - ret = fmc_driver_register(&t_drv); - return ret; -} - -static void t_exit(void) -{ - fmc_driver_unregister(&t_drv); -} - -module_init(t_init); -module_exit(t_exit); - -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/fmc/fmc-write-eeprom.c b/drivers/fmc/fmc-write-eeprom.c deleted file mode 100644 index 1c7826e3f526..000000000000 --- a/drivers/fmc/fmc-write-eeprom.c +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include -#include -#include -#include -#include -#include - -/* - * This module uses the firmware loader to program the whole or part - * of the FMC eeprom. The meat is in the _run functions. However, no - * default file name is provided, to avoid accidental mishaps. Also, - * you must pass the busid argument - */ -static struct fmc_driver fwe_drv; - -FMC_PARAM_BUSID(fwe_drv); - -/* The "file=" is like the generic "gateware=" used elsewhere */ -static char *fwe_file[FMC_MAX_CARDS]; -static int fwe_file_n; -module_param_array_named(file, fwe_file, charp, &fwe_file_n, 0444); - -static int fwe_run_tlv(struct fmc_device *fmc, const struct firmware *fw, - int write) -{ - const uint8_t *p = fw->data; - int len = fw->size; - uint16_t thislen, thisaddr; - int err; - - /* format is: 'w' addr16 len16 data... */ - while (len > 5) { - thisaddr = get_unaligned_le16(p+1); - thislen = get_unaligned_le16(p+3); - if (p[0] != 'w' || thislen + 5 > len) { - dev_err(&fmc->dev, "invalid tlv at offset %ti\n", - p - fw->data); - return -EINVAL; - } - err = 0; - if (write) { - dev_info(&fmc->dev, "write %i bytes at 0x%04x\n", - thislen, thisaddr); - err = fmc_write_ee(fmc, thisaddr, p + 5, thislen); - } - if (err < 0) { - dev_err(&fmc->dev, "write failure @0x%04x\n", - thisaddr); - return err; - } - p += 5 + thislen; - len -= 5 + thislen; - } - if (write) - dev_info(&fmc->dev, "write_eeprom: success\n"); - return 0; -} - -static int fwe_run_bin(struct fmc_device *fmc, const struct firmware *fw) -{ - int ret; - - dev_info(&fmc->dev, "programming %zi bytes\n", fw->size); - ret = fmc_write_ee(fmc, 0, (void *)fw->data, fw->size); - if (ret < 0) { - dev_info(&fmc->dev, "write_eeprom: error %i\n", ret); - return ret; - } - dev_info(&fmc->dev, "write_eeprom: success\n"); - return 0; -} - -static int fwe_run(struct fmc_device *fmc, const struct firmware *fw, char *s) -{ - char *last4 = s + strlen(s) - 4; - int err; - - if (!strcmp(last4, ".bin")) - return fwe_run_bin(fmc, fw); - if (!strcmp(last4, ".tlv")) { - err = fwe_run_tlv(fmc, fw, 0); - if (!err) - err = fwe_run_tlv(fmc, fw, 1); - return err; - } - dev_err(&fmc->dev, "invalid file name \"%s\"\n", s); - return -EINVAL; -} - -/* - * Programming is done at probe time. Morever, only those listed with - * busid= are programmed. - * card is probed for, only one is programmed. Unfortunately, it's - * difficult to know in advance when probing the first card if others - * are there. - */ -static int fwe_probe(struct fmc_device *fmc) -{ - int err, index = 0; - const struct firmware *fw; - struct device *dev = &fmc->dev; - char *s; - - if (!fwe_drv.busid_n) { - dev_err(dev, "%s: no busid passed, refusing all cards\n", - KBUILD_MODNAME); - return -ENODEV; - } - - index = fmc_validate(fmc, &fwe_drv); - if (index < 0) { - pr_err("%s: refusing device \"%s\"\n", KBUILD_MODNAME, - dev_name(dev)); - return -ENODEV; - } - if (index >= fwe_file_n) { - pr_err("%s: no filename for device index %i\n", - KBUILD_MODNAME, index); - return -ENODEV; - } - s = fwe_file[index]; - if (!s) { - pr_err("%s: no filename for \"%s\" not programming\n", - KBUILD_MODNAME, dev_name(dev)); - return -ENOENT; - } - err = request_firmware(&fw, s, dev); - if (err < 0) { - dev_err(&fmc->dev, "request firmware \"%s\": error %i\n", - s, err); - return err; - } - fwe_run(fmc, fw, s); - release_firmware(fw); - return 0; -} - -static int fwe_remove(struct fmc_device *fmc) -{ - return 0; -} - -static struct fmc_driver fwe_drv = { - .version = FMC_VERSION, - .driver.name = KBUILD_MODNAME, - .probe = fwe_probe, - .remove = fwe_remove, - /* no table, as the current match just matches everything */ -}; - -static int fwe_init(void) -{ - int ret; - - ret = fmc_driver_register(&fwe_drv); - return ret; -} - -static void fwe_exit(void) -{ - fmc_driver_unregister(&fwe_drv); -} - -module_init(fwe_init); -module_exit(fwe_exit); - -MODULE_LICENSE("GPL"); diff --git a/drivers/fmc/fru-parse.c b/drivers/fmc/fru-parse.c deleted file mode 100644 index f551b81f4fd9..000000000000 --- a/drivers/fmc/fru-parse.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#include - -/* Some internal helpers */ -static struct fru_type_length * -__fru_get_board_tl(struct fru_common_header *header, int nr) -{ - struct fru_board_info_area *bia; - struct fru_type_length *tl; - - bia = fru_get_board_area(header); - tl = bia->tl; - while (nr > 0 && !fru_is_eof(tl)) { - tl = fru_next_tl(tl); - nr--; - } - if (fru_is_eof(tl)) - return NULL; - return tl; -} - -static char *__fru_alloc_get_tl(struct fru_common_header *header, int nr) -{ - struct fru_type_length *tl; - char *res; - - tl = __fru_get_board_tl(header, nr); - if (!tl) - return NULL; - - res = fru_alloc(fru_strlen(tl) + 1); - if (!res) - return NULL; - return fru_strcpy(res, tl); -} - -/* Public checksum verifiers */ -int fru_header_cksum_ok(struct fru_common_header *header) -{ - uint8_t *ptr = (void *)header; - int i, sum; - - for (i = sum = 0; i < sizeof(*header); i++) - sum += ptr[i]; - return (sum & 0xff) == 0; -} -int fru_bia_cksum_ok(struct fru_board_info_area *bia) -{ - uint8_t *ptr = (void *)bia; - int i, sum; - - for (i = sum = 0; i < 8 * bia->area_len; i++) - sum += ptr[i]; - return (sum & 0xff) == 0; -} - -/* Get various stuff, trivial */ -char *fru_get_board_manufacturer(struct fru_common_header *header) -{ - return __fru_alloc_get_tl(header, 0); -} -char *fru_get_product_name(struct fru_common_header *header) -{ - return __fru_alloc_get_tl(header, 1); -} -char *fru_get_serial_number(struct fru_common_header *header) -{ - return __fru_alloc_get_tl(header, 2); -} -char *fru_get_part_number(struct fru_common_header *header) -{ - return __fru_alloc_get_tl(header, 3); -} diff --git a/include/linux/fmc-sdb.h b/include/linux/fmc-sdb.h deleted file mode 100644 index bec899f0867c..000000000000 --- a/include/linux/fmc-sdb.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is separate from sdb.h, because I want that one to remain - * unchanged (as far as possible) from the official sdb distribution - * - * This file and associated functionality are a playground for me to - * understand stuff which will later be implemented in more generic places. - */ -#include - -/* This is the union of all currently defined types */ -union sdb_record { - struct sdb_interconnect ic; - struct sdb_device dev; - struct sdb_bridge bridge; - struct sdb_integration integr; - struct sdb_empty empty; - struct sdb_synthesis synthesis; - struct sdb_repo_url repo_url; -}; - -struct fmc_device; - -/* Every sdb table is turned into this structure */ -struct sdb_array { - int len; - int level; - unsigned long baseaddr; - struct fmc_device *fmc; /* the device that hosts it */ - struct sdb_array *parent; /* NULL at root */ - union sdb_record *record; /* copies of the struct */ - struct sdb_array **subtree; /* only valid for bridge items */ -}; - -extern int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address); -extern void fmc_show_sdb_tree(const struct fmc_device *fmc); -extern signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor, - uint32_t device, unsigned long *sz); -extern int fmc_free_sdb_tree(struct fmc_device *fmc); diff --git a/include/linux/fmc.h b/include/linux/fmc.h deleted file mode 100644 index 8661a46a676f..000000000000 --- a/include/linux/fmc.h +++ /dev/null @@ -1,271 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2012 CERN (www.cern.ch) - * Author: Alessandro Rubini - * - * This work is part of the White Rabbit project, a research effort led - * by CERN, the European Institute for Nuclear Research. - */ -#ifndef __LINUX_FMC_H__ -#define __LINUX_FMC_H__ -#include -#include -#include -#include -#include -#include - -struct fmc_device; -struct fmc_driver; - -/* - * This bus abstraction is developed separately from drivers, so we need - * to check the version of the data structures we receive. - */ - -#define FMC_MAJOR 3 -#define FMC_MINOR 0 -#define FMC_VERSION ((FMC_MAJOR << 16) | FMC_MINOR) -#define __FMC_MAJOR(x) ((x) >> 16) -#define __FMC_MINOR(x) ((x) & 0xffff) - -/* - * The device identification, as defined by the IPMI FRU (Field Replaceable - * Unit) includes four different strings to describe the device. Here we - * only match the "Board Manufacturer" and the "Board Product Name", - * ignoring the "Board Serial Number" and "Board Part Number". All 4 are - * expected to be strings, so they are treated as zero-terminated C strings. - * Unspecified string (NULL) means "any", so if both are unspecified this - * is a catch-all driver. So null entries are allowed and we use array - * and length. This is unlike pci and usb that use null-terminated arrays - */ -struct fmc_fru_id { - char *manufacturer; - char *product_name; -}; - -/* - * If the FPGA is already programmed (think Etherbone or the second - * SVEC slot), we can match on SDB devices in the memory image. This - * match uses an array of devices that must all be present, and the - * match is based on vendor and device only. Further checks are expected - * to happen in the probe function. Zero means "any" and catch-all is allowed. - */ -struct fmc_sdb_one_id { - uint64_t vendor; - uint32_t device; -}; -struct fmc_sdb_id { - struct fmc_sdb_one_id *cores; - int cores_nr; -}; - -struct fmc_device_id { - struct fmc_fru_id *fru_id; - int fru_id_nr; - struct fmc_sdb_id *sdb_id; - int sdb_id_nr; -}; - -/* This sizes the module_param_array used by generic module parameters */ -#define FMC_MAX_CARDS 32 - -/* The driver is a pretty simple thing */ -struct fmc_driver { - unsigned long version; - struct device_driver driver; - int (*probe)(struct fmc_device *); - int (*remove)(struct fmc_device *); - const struct fmc_device_id id_table; - /* What follows is for generic module parameters */ - int busid_n; - int busid_val[FMC_MAX_CARDS]; - int gw_n; - char *gw_val[FMC_MAX_CARDS]; -}; -#define to_fmc_driver(x) container_of((x), struct fmc_driver, driver) - -/* These are the generic parameters, that drivers may instantiate */ -#define FMC_PARAM_BUSID(_d) \ - module_param_array_named(busid, _d.busid_val, int, &_d.busid_n, 0444) -#define FMC_PARAM_GATEWARE(_d) \ - module_param_array_named(gateware, _d.gw_val, charp, &_d.gw_n, 0444) - -/* - * Drivers may need to configure gpio pins in the carrier. To read input - * (a very uncommon operation, and definitely not in the hot paths), just - * configure one gpio only and get 0 or 1 as retval of the config method - */ -struct fmc_gpio { - char *carrier_name; /* name or NULL for virtual pins */ - int gpio; - int _gpio; /* internal use by the carrier */ - int mode; /* GPIOF_DIR_OUT etc */ - int irqmode; /* IRQF_TRIGGER_LOW and so on */ -}; - -/* The numbering of gpio pins allows access to raw pins or virtual roles */ -#define FMC_GPIO_RAW(x) (x) /* 4096 of them */ -#define __FMC_GPIO_IS_RAW(x) ((x) < 0x1000) -#define FMC_GPIO_IRQ(x) ((x) + 0x1000) /* 256 of them */ -#define FMC_GPIO_LED(x) ((x) + 0x1100) /* 256 of them */ -#define FMC_GPIO_KEY(x) ((x) + 0x1200) /* 256 of them */ -#define FMC_GPIO_TP(x) ((x) + 0x1300) /* 256 of them */ -#define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */ -/* We may add SCL and SDA, or other roles if the need arises */ - -/* - * These are similar to the legacy Linux GPIO defines from - * but in fact FMC has its own GPIO handling and is not using the Linux - * GPIO subsystem. - */ -#define GPIOF_DIR_OUT (0 << 0) -#define GPIOF_DIR_IN (1 << 0) -#define GPIOF_INIT_LOW (0 << 1) -#define GPIOF_INIT_HIGH (1 << 1) - -/* - * The operations are offered by each carrier and should make driver - * design completely independent of the carrier. Named GPIO pins may be - * the exception. - */ -struct fmc_operations { - uint32_t (*read32)(struct fmc_device *fmc, int offset); - void (*write32)(struct fmc_device *fmc, uint32_t value, int offset); - int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv); - int (*reprogram_raw)(struct fmc_device *f, struct fmc_driver *d, - void *gw, unsigned long len); - int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw); - int (*irq_request)(struct fmc_device *fmc, irq_handler_t h, - char *name, int flags); - void (*irq_ack)(struct fmc_device *fmc); - int (*irq_free)(struct fmc_device *fmc); - int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio, - int ngpio); - int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l); - int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l); -}; - -/* Prefer this helper rather than calling of fmc->reprogram directly */ -int fmc_reprogram_raw(struct fmc_device *fmc, struct fmc_driver *d, - void *gw, unsigned long len, int sdb_entry); -extern int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw, - int sdb_entry); - -/* - * The device reports all information needed to access hw. - * - * If we have eeprom_len and not contents, the core reads it. - * Then, parsing of identifiers is done by the core which fills fmc_fru_id.. - * Similarly a device that must be matched based on SDB cores must - * fill the entry point and the core will scan the bus (FIXME: sdb match) - */ -struct fmc_device { - unsigned long version; - unsigned long flags; - struct module *owner; /* char device must pin it */ - struct fmc_fru_id id; /* for EEPROM-based match */ - struct fmc_operations *op; /* carrier-provided */ - int irq; /* according to host bus. 0 == none */ - int eeprom_len; /* Usually 8kB, may be less */ - int eeprom_addr; /* 0x50, 0x52 etc */ - uint8_t *eeprom; /* Full contents or leading part */ - char *carrier_name; /* "SPEC" or similar, for special use */ - void *carrier_data; /* "struct spec *" or equivalent */ - __iomem void *fpga_base; /* May be NULL (Etherbone) */ - __iomem void *slot_base; /* Set by the driver */ - struct fmc_device **devarray; /* Allocated by the bus */ - int slot_id; /* Index in the slot array */ - int nr_slots; /* Number of slots in this carrier */ - unsigned long memlen; /* Used for the char device */ - struct device dev; /* For Linux use */ - struct device *hwdev; /* The underlying hardware device */ - unsigned long sdbfs_entry; - struct sdb_array *sdb; - uint32_t device_id; /* Filled by the device */ - char *mezzanine_name; /* Defaults to ``fmc'' */ - void *mezzanine_data; - - struct dentry *dbg_dir; - struct dentry *dbg_sdb_dump; -}; -#define to_fmc_device(x) container_of((x), struct fmc_device, dev) - -#define FMC_DEVICE_HAS_GOLDEN 1 -#define FMC_DEVICE_HAS_CUSTOM 2 -#define FMC_DEVICE_NO_MEZZANINE 4 -#define FMC_DEVICE_MATCH_SDB 8 /* fmc-core must scan sdb in fpga */ - -/* - * If fpga_base can be used, the carrier offers no readl/writel methods, and - * this expands to a single, fast, I/O access. - */ -static inline uint32_t fmc_readl(struct fmc_device *fmc, int offset) -{ - if (unlikely(fmc->op->read32)) - return fmc->op->read32(fmc, offset); - return readl(fmc->fpga_base + offset); -} -static inline void fmc_writel(struct fmc_device *fmc, uint32_t val, int off) -{ - if (unlikely(fmc->op->write32)) - fmc->op->write32(fmc, val, off); - else - writel(val, fmc->fpga_base + off); -} - -/* pci-like naming */ -static inline void *fmc_get_drvdata(const struct fmc_device *fmc) -{ - return dev_get_drvdata(&fmc->dev); -} - -static inline void fmc_set_drvdata(struct fmc_device *fmc, void *data) -{ - dev_set_drvdata(&fmc->dev, data); -} - -struct fmc_gateware { - void *bitstream; - unsigned long len; -}; - -/* The 5 access points */ -extern int fmc_driver_register(struct fmc_driver *drv); -extern void fmc_driver_unregister(struct fmc_driver *drv); -extern int fmc_device_register(struct fmc_device *tdev); -extern int fmc_device_register_gw(struct fmc_device *tdev, - struct fmc_gateware *gw); -extern void fmc_device_unregister(struct fmc_device *tdev); - -/* Three more for device sets, all driven by the same FPGA */ -extern int fmc_device_register_n(struct fmc_device **devs, int n); -extern int fmc_device_register_n_gw(struct fmc_device **devs, int n, - struct fmc_gateware *gw); -extern void fmc_device_unregister_n(struct fmc_device **devs, int n); - -/* Internal cross-calls between files; not exported to other modules */ -extern int fmc_match(struct device *dev, struct device_driver *drv); -extern int fmc_fill_id_info(struct fmc_device *fmc); -extern void fmc_free_id_info(struct fmc_device *fmc); -extern void fmc_dump_eeprom(const struct fmc_device *fmc); - -/* helpers for FMC operations */ -extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, - char *name, int flags); -extern void fmc_irq_free(struct fmc_device *fmc); -extern void fmc_irq_ack(struct fmc_device *fmc); -extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv); -extern int fmc_gpio_config(struct fmc_device *fmc, struct fmc_gpio *gpio, - int ngpio); -extern int fmc_read_ee(struct fmc_device *fmc, int pos, void *d, int l); -extern int fmc_write_ee(struct fmc_device *fmc, int pos, const void *d, int l); - -/* helpers for FMC operations */ -extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, - char *name, int flags); -extern void fmc_irq_free(struct fmc_device *fmc); -extern void fmc_irq_ack(struct fmc_device *fmc); -extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv); - -#endif /* __LINUX_FMC_H__ */ -- cgit v1.2.3 From 9a5ed0bac86edce4097abf7595a7de050b2f87fa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 12 Jun 2019 09:42:22 +0200 Subject: regulator: wm831x: Convert to use GPIO descriptors This converts the Wolfson Micro WM831x DCDC converter to use a GPIO descriptor for the GPIO driving the DVS pin. There is just one (non-DT) machine in the kernel using this, and that is the Wolfson Micro (now Cirrus) Cragganmore 6410 so we patch this board to pass a descriptor table and fix up the driver accordingly. Cc: Charles Keepax Cc: Richard Fitzgerald Cc: patches@opensource.cirrus.com Signed-off-by: Linus Walleij Acked-by: Charles Keepax Signed-off-by: Mark Brown --- arch/arm/mach-s3c64xx/mach-crag6410.c | 21 ++++++++++++++++++++- drivers/regulator/wm831x-dcdc.c | 29 +++++++++++++---------------- include/linux/mfd/wm831x/pdata.h | 1 - 3 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index 379424d72ae7..8ec6a4f5eb05 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -398,7 +399,6 @@ static struct pca953x_platform_data crag6410_pca_data = { /* VDDARM is controlled by DVS1 connected to GPK(0) */ static struct wm831x_buckv_pdata vddarm_pdata = { .dvs_control_src = 1, - .dvs_gpio = S3C64XX_GPK(0), }; static struct regulator_consumer_supply vddarm_consumers[] = { @@ -596,6 +596,24 @@ static struct wm831x_pdata crag_pmic_pdata = { .touch = &touch_pdata, }; +/* + * VDDARM is eventually ending up as a regulator hanging on the MFD cell device + * "wm831x-buckv.1" spawn from drivers/mfd/wm831x-core.c. + * + * From the note on the platform data we can see that this is clearly DVS1 + * and assigned as dcdc1 resource to the MFD core which sets .id of the cell + * spawning the DVS1 platform device to 1, then the cell platform device + * name is calculated from 10*instance + id resulting in the device name + * "wm831x-buckv.11" + */ +static struct gpiod_lookup_table crag_pmic_gpiod_table = { + .dev_id = "wm831x-buckv.11", + .table = { + GPIO_LOOKUP("GPIOK", 0, "dvs", GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct i2c_board_info i2c_devs0[] = { { I2C_BOARD_INFO("24c08", 0x50), }, { I2C_BOARD_INFO("tca6408", 0x20), @@ -836,6 +854,7 @@ static void __init crag6410_machine_init(void) s3c_fb_set_platdata(&crag6410_lcd_pdata); dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata); + gpiod_add_lookup_table(&crag_pmic_gpiod_table); i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0)); i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index b422eef97b77..018dbbd96771 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -50,7 +50,7 @@ struct wm831x_dcdc { int base; struct wm831x *wm831x; struct regulator_dev *regulator; - int dvs_gpio; + struct gpio_desc *dvs_gpiod; int dvs_gpio_state; int on_vsel; int dvs_vsel; @@ -217,7 +217,7 @@ static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state) return 0; dcdc->dvs_gpio_state = state; - gpio_set_value(dcdc->dvs_gpio, state); + gpiod_set_value(dcdc->dvs_gpiod, state); /* Should wait for DVS state change to be asserted if we have * a GPIO for it, for now assume the device is configured @@ -237,10 +237,10 @@ static int wm831x_buckv_set_voltage_sel(struct regulator_dev *rdev, int ret; /* If this value is already set then do a GPIO update if we can */ - if (dcdc->dvs_gpio && dcdc->on_vsel == vsel) + if (dcdc->dvs_gpiod && dcdc->on_vsel == vsel) return wm831x_buckv_set_dvs(rdev, 0); - if (dcdc->dvs_gpio && dcdc->dvs_vsel == vsel) + if (dcdc->dvs_gpiod && dcdc->dvs_vsel == vsel) return wm831x_buckv_set_dvs(rdev, 1); /* Always set the ON status to the minimum voltage */ @@ -249,7 +249,7 @@ static int wm831x_buckv_set_voltage_sel(struct regulator_dev *rdev, return ret; dcdc->on_vsel = vsel; - if (!dcdc->dvs_gpio) + if (!dcdc->dvs_gpiod) return ret; /* Kick the voltage transition now */ @@ -296,7 +296,7 @@ static int wm831x_buckv_get_voltage_sel(struct regulator_dev *rdev) { struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev); - if (dcdc->dvs_gpio && dcdc->dvs_gpio_state) + if (dcdc->dvs_gpiod && dcdc->dvs_gpio_state) return dcdc->dvs_vsel; else return dcdc->on_vsel; @@ -337,7 +337,7 @@ static void wm831x_buckv_dvs_init(struct platform_device *pdev, int ret; u16 ctrl; - if (!pdata || !pdata->dvs_gpio) + if (!pdata) return; /* gpiolib won't let us read the GPIO status so pick the higher @@ -345,17 +345,14 @@ static void wm831x_buckv_dvs_init(struct platform_device *pdev, */ dcdc->dvs_gpio_state = pdata->dvs_init_state; - ret = devm_gpio_request_one(&pdev->dev, pdata->dvs_gpio, - dcdc->dvs_gpio_state ? GPIOF_INIT_HIGH : 0, - "DCDC DVS"); - if (ret < 0) { - dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n", - dcdc->name, ret); + dcdc->dvs_gpiod = devm_gpiod_get(&pdev->dev, "dvs", + dcdc->dvs_gpio_state ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW); + if (IS_ERR(dcdc->dvs_gpiod)) { + dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %ld\n", + dcdc->name, PTR_ERR(dcdc->dvs_gpiod)); return; } - dcdc->dvs_gpio = pdata->dvs_gpio; - switch (pdata->dvs_control_src) { case 1: ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h index dcc9631b3052..1b8bb36e13b8 100644 --- a/include/linux/mfd/wm831x/pdata.h +++ b/include/linux/mfd/wm831x/pdata.h @@ -52,7 +52,6 @@ struct wm831x_battery_pdata { * I2C or SPI buses. */ struct wm831x_buckv_pdata { - int dvs_gpio; /** CPU GPIO to use for DVS switching */ int dvs_control_src; /** Hardware DVS source to use (1 or 2) */ int dvs_init_state; /** DVS state to expect on startup */ int dvs_state_gpio; /** CPU GPIO to use for monitoring status */ -- cgit v1.2.3 From 5740671e596bdc3986a5391997de194300970201 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Jun 2019 14:28:30 +0100 Subject: dma-fence/reservation: Markup rcu protected access for DEBUG_MUTEXES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark the access to reservation_object.fence as being protected to silence sparse. Signed-off-by: Chris Wilson Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20190612132830.31221-1-chris@chris-wilson.co.uk --- include/linux/reservation.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index ee750765cc94..644a22dbe53b 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -216,8 +216,12 @@ reservation_object_unlock(struct reservation_object *obj) { #ifdef CONFIG_DEBUG_MUTEXES /* Test shared fence slot reservation */ - if (obj->fence) - obj->fence->shared_max = obj->fence->shared_count; + if (rcu_access_pointer(obj->fence)) { + struct reservation_object_list *fence = + reservation_object_get_list(obj); + + fence->shared_max = fence->shared_count; + } #endif ww_mutex_unlock(&obj->lock); } -- cgit v1.2.3 From ddde3c18b70061cc09b84a52624909349c212822 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:35 +0200 Subject: vt: More locking checks I honestly have no idea what the subtle differences between con_is_visible, con_is_fg (internal to vt.c) and con_is_bound are. But it looks like both vc->vc_display_fg and con_driver_map are protected by the console_lock, so probably better if we hold that when checking this. To do that I had to deinline the con_is_visible function. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Acked-by: Greg Kroah-Hartman Reviewed-by: Maarten Lankhorst Cc: Greg Kroah-Hartman Cc: Nicolas Pitre Cc: Martin Hostettler Cc: Adam Borowski Cc: Daniel Vetter Cc: Mikulas Patocka Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-5-daniel.vetter@ffwll.ch --- drivers/tty/vt/vt.c | 16 ++++++++++++++++ include/linux/console_struct.h | 5 +---- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 30d29a04bfee..ec92f36ab5c4 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -3822,6 +3822,8 @@ int con_is_bound(const struct consw *csw) { int i, bound = 0; + WARN_CONSOLE_UNLOCKED(); + for (i = 0; i < MAX_NR_CONSOLES; i++) { if (con_driver_map[i] == csw) { bound = 1; @@ -3833,6 +3835,20 @@ int con_is_bound(const struct consw *csw) } EXPORT_SYMBOL(con_is_bound); +/** + * con_is_visible - checks whether the current console is visible + * @vc: virtual console + * + * RETURNS: zero if not visible, nonzero if visible + */ +bool con_is_visible(const struct vc_data *vc) +{ + WARN_CONSOLE_UNLOCKED(); + + return *vc->vc_display_fg == vc; +} +EXPORT_SYMBOL(con_is_visible); + /** * con_debug_enter - prepare the console for the kernel debugger * @sw: console driver diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index ed798e114663..24d4c16e3ae0 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -168,9 +168,6 @@ extern void vc_SAK(struct work_struct *work); #define CUR_DEFAULT CUR_UNDERLINE -static inline bool con_is_visible(const struct vc_data *vc) -{ - return *vc->vc_display_fg == vc; -} +bool con_is_visible(const struct vc_data *vc); #endif /* _LINUX_CONSOLE_STRUCT_H */ -- cgit v1.2.3 From 97b67986f1451c772b488d597310f95c14547cce Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:41 +0200 Subject: fbcon: call fbcon_fb_(un)registered directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit 6104c37094e729f3d4ce65797002112735d49cd1 Author: Daniel Vetter Date: Tue Aug 1 17:32:07 2017 +0200 fbcon: Make fbcon a built-time depency for fbdev we have a static dependency between fbcon and fbdev, and we can replace the indirection through the notifier chain with a function call. v2: Sam Ravnborg noticed that mach-pxa/am200epd.c has a notifier too, and listens to this. ... Looking at the code it seems to wait for some fb to show up, so that it can get the framebuffer base address from the fb_info struct. I suspect his is some firmware fbdev. Then it uses that information to let the real fbdev driver (metronomefb.c by the looks) get at the framebuffer memory. This doesn't looke like it's easy to fix (except by deleting the entire thing, seems untouched since 2008, we might be able to get away with that), so let's just stuff a few #ifdef into fb.h and fbmem.c and cry over them for a bit. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Greg Kroah-Hartman Cc: "Noralf Trønnes" Cc: Yisheng Xie Cc: Peter Rosin Cc: "Michał Mirosław" Cc: Thomas Zimmermann Cc: Mikulas Patocka Cc: linux-fbdev@vger.kernel.org Cc: Daniel Mack Cc: Haojian Zhuang Cc: Robert Jarzmik Cc: Konstantin Khorenko Cc: Prarit Bhargava Cc: Gerd Hoffmann Cc: Steve Sakoman Cc: Steve Sakoman Cc: linux-arm-kernel@lists.infradead.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-11-daniel.vetter@ffwll.ch --- arch/arm/mach-pxa/am200epd.c | 13 +++++++++++-- drivers/video/fbdev/core/fbcon.c | 14 +++----------- drivers/video/fbdev/core/fbmem.c | 24 +++++++++++++++++------- include/linux/fb.h | 7 +++++-- include/linux/fbcon.h | 4 ++++ 5 files changed, 40 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/am200epd.c b/arch/arm/mach-pxa/am200epd.c index 50e18ed37fa6..cac0bb09db14 100644 --- a/arch/arm/mach-pxa/am200epd.c +++ b/arch/arm/mach-pxa/am200epd.c @@ -347,8 +347,17 @@ int __init am200_init(void) { int ret; - /* before anything else, we request notification for any fb - * creation events */ + /* + * Before anything else, we request notification for any fb + * creation events. + * + * FIXME: This is terrible and needs to be nuked. The notifier is used + * to get at the fb base address from the boot splash fb driver, which + * is then passed to metronomefb. Instaed of metronomfb or this board + * support file here figuring this out on their own. + * + * See also the #ifdef in fbmem.c. + */ fb_register_client(&am200_fb_notif); pxa2xx_mfp_config(ARRAY_AND_SIZE(am200_pin_config)); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 9c14eab77d99..c12fc98035e0 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3119,14 +3119,14 @@ static int fbcon_fb_unbind(int idx) } /* called with console_lock held */ -static int fbcon_fb_unregistered(struct fb_info *info) +void fbcon_fb_unregistered(struct fb_info *info) { int i, idx; WARN_CONSOLE_UNLOCKED(); if (deferred_takeover) - return 0; + return; idx = info->node; for (i = first_fb_vc; i <= last_fb_vc; i++) { @@ -3155,8 +3155,6 @@ static int fbcon_fb_unregistered(struct fb_info *info) if (!num_registered_fb) do_unregister_con_driver(&fb_con); - - return 0; } /* called with console_lock held */ @@ -3215,7 +3213,7 @@ static inline void fbcon_select_primary(struct fb_info *info) #endif /* CONFIG_FRAMEBUFFER_DETECT_PRIMARY */ /* called with console_lock held */ -static int fbcon_fb_registered(struct fb_info *info) +int fbcon_fb_registered(struct fb_info *info) { int ret = 0, i, idx; @@ -3359,12 +3357,6 @@ static int fbcon_event_notify(struct notifier_block *self, idx = info->node; ret = fbcon_fb_unbind(idx); break; - case FB_EVENT_FB_REGISTERED: - ret = fbcon_fb_registered(info); - break; - case FB_EVENT_FB_UNREGISTERED: - ret = fbcon_fb_unregistered(info); - break; case FB_EVENT_SET_CONSOLE_MAP: /* called with console lock held */ con2fb = event->data; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 8ba674ffb3c9..bed7698ad18a 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1660,7 +1660,6 @@ MODULE_PARM_DESC(lockless_register_fb, static int do_register_framebuffer(struct fb_info *fb_info) { int i, ret; - struct fb_event event; struct fb_videomode mode; if (fb_check_foreignness(fb_info)) @@ -1723,7 +1722,14 @@ static int do_register_framebuffer(struct fb_info *fb_info) fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; - event.info = fb_info; +#ifdef CONFIG_GUMSTIX_AM200EPD + { + struct fb_event event; + event.info = fb_info; + fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event); + } +#endif + if (!lockless_register_fb) console_lock(); else @@ -1732,9 +1738,8 @@ static int do_register_framebuffer(struct fb_info *fb_info) ret = -ENODEV; goto unlock_console; } - ret = 0; - fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event); + ret = fbcon_fb_registered(fb_info); unlock_fb_info(fb_info); unlock_console: if (!lockless_register_fb) @@ -1771,7 +1776,6 @@ static int __unlink_framebuffer(struct fb_info *fb_info); static int do_unregister_framebuffer(struct fb_info *fb_info) { - struct fb_event event; int ret; ret = unbind_console(fb_info); @@ -1789,9 +1793,15 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) registered_fb[fb_info->node] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); - event.info = fb_info; +#ifdef CONFIG_GUMSTIX_AM200EPD + { + struct fb_event event; + event.info = fb_info; + fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); + } +#endif console_lock(); - fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); + fbcon_fb_unregistered(fb_info); console_unlock(); /* this may free fb info */ diff --git a/include/linux/fb.h b/include/linux/fb.h index f52ef0ad6781..288175fafaf6 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -136,10 +136,13 @@ struct fb_cursor_user { #define FB_EVENT_RESUME 0x03 /* An entry from the modelist was removed */ #define FB_EVENT_MODE_DELETE 0x04 -/* A driver registered itself */ + +#ifdef CONFIG_GUMSTIX_AM200EPD +/* only used by mach-pxa/am200epd.c */ #define FB_EVENT_FB_REGISTERED 0x05 -/* A driver unregistered itself */ #define FB_EVENT_FB_UNREGISTERED 0x06 +#endif + /* CONSOLE-SPECIFIC: get console to framebuffer mapping */ #define FB_EVENT_GET_CONSOLE_MAP 0x07 /* CONSOLE-SPECIFIC: set console to framebuffer mapping */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index f68a7db14165..94a71e9e1257 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -4,9 +4,13 @@ #ifdef CONFIG_FRAMEBUFFER_CONSOLE void __init fb_console_init(void); void __exit fb_console_exit(void); +int fbcon_fb_registered(struct fb_info *info); +void fbcon_fb_unregistered(struct fb_info *info); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} +static inline int fbcon_fb_registered(struct fb_info *info) { return 0; } +static inline void fbcon_fb_unregistered(struct fb_info *info) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From cf4a3ae4ef3399179166a464af1d6b172225bef4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:47 +0200 Subject: fbdev: lock_fb_info cannot fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since commit c47747fde931c02455683bd00ea43eaa62f35b0e Author: Linus Torvalds Date: Wed May 11 14:58:34 2011 -0700 fbmem: make read/write/ioctl use the frame buffer at open time fbdev has gained proper refcounting for the fbinfo attached to any open files, which means that the backing driver (stored in fb_info->fbops) cannot untimely disappear anymore. The only thing that can happen is that the entire device just outright disappears and gets unregistered, but file_fb_info does check for that. Except that it's racy - it only checks once at the start of a file_ops, there's no guarantee that the underlying fbdev won't untimely disappear. Aside: A proper way to fix that race is probably to replicate the srcu trickery we've rolled out in drm. But given that this race has existed since forever it's probably not one we need to fix right away. do_unregister_framebuffer also nowhere clears fb_info->fbops, hence the check in lock_fb_info can't possible catch a disappearing fbdev later on. Long story short: Ever since the above commit the fb_info->fbops checks have essentially become dead code. Remove this all. Aside from the file_ops callbacks, and stuff called from there there's only register/unregister code left. If that goes wrong a driver managed to register/unregister a device instance twice or in the wrong order. That's just a driver bug. v2: - fb_mmap had an open-coded version of the fbinfo->fops check, because it doesn't need the fbinfo->lock. Delete that too. - Use the wrapper function in fb_open/release now, since no difference anymore. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Yisheng Xie Cc: Sergey Senozhatsky Cc: "Noralf Trønnes" Cc: Peter Rosin Cc: "Michał Mirosław" Cc: Mikulas Patocka Cc: "Gustavo A. R. Silva" Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-17-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcmap.c | 6 +--- drivers/video/fbdev/core/fbcon.c | 3 +- drivers/video/fbdev/core/fbmem.c | 73 +++++++++------------------------------ include/linux/fb.h | 5 ++- 4 files changed, 23 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index 2811c4afde01..e5ae33c1a8e8 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c @@ -285,11 +285,7 @@ int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) goto out; } umap.start = cmap->start; - if (!lock_fb_info(info)) { - rc = -ENODEV; - goto out; - } - + lock_fb_info(info); rc = fb_set_cmap(&umap, info); unlock_fb_info(info); out: diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index c12fc98035e0..f7f3eb0f1893 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2364,8 +2364,7 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info, } - if (!lock_fb_info(info)) - return; + lock_fb_info(info); event.info = info; event.data = ␣ fb_notifier_call_chain(FB_EVENT_CONBLANK, &event); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index bed7698ad18a..d73762324ca2 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -80,17 +80,6 @@ static void put_fb_info(struct fb_info *fb_info) fb_info->fbops->fb_destroy(fb_info); } -int lock_fb_info(struct fb_info *info) -{ - mutex_lock(&info->lock); - if (!info->fbops) { - mutex_unlock(&info->lock); - return 0; - } - return 1; -} -EXPORT_SYMBOL(lock_fb_info); - /* * Helpers */ @@ -1121,8 +1110,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, switch (cmd) { case FBIOGET_VSCREENINFO: - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); var = info->var; unlock_fb_info(info); @@ -1132,10 +1120,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, if (copy_from_user(&var, argp, sizeof(var))) return -EFAULT; console_lock(); - if (!lock_fb_info(info)) { - console_unlock(); - return -ENODEV; - } + lock_fb_info(info); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_set_var(info, &var); info->flags &= ~FBINFO_MISC_USEREVENT; @@ -1145,8 +1130,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, ret = -EFAULT; break; case FBIOGET_FSCREENINFO: - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); fix = info->fix; if (info->flags & FBINFO_HIDE_SMEM_START) fix.smem_start = 0; @@ -1162,8 +1146,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, case FBIOGETCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) return -EFAULT; - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); cmap_from = info->cmap; unlock_fb_info(info); ret = fb_cmap_to_user(&cmap_from, &cmap); @@ -1172,10 +1155,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, if (copy_from_user(&var, argp, sizeof(var))) return -EFAULT; console_lock(); - if (!lock_fb_info(info)) { - console_unlock(); - return -ENODEV; - } + lock_fb_info(info); ret = fb_pan_display(info, &var); unlock_fb_info(info); console_unlock(); @@ -1192,8 +1172,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EINVAL; con2fb.framebuffer = -1; event.data = &con2fb; - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); event.info = info; fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); unlock_fb_info(info); @@ -1214,10 +1193,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, } event.data = &con2fb; console_lock(); - if (!lock_fb_info(info)) { - console_unlock(); - return -ENODEV; - } + lock_fb_info(info); event.info = info; ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); unlock_fb_info(info); @@ -1225,10 +1201,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, break; case FBIOBLANK: console_lock(); - if (!lock_fb_info(info)) { - console_unlock(); - return -ENODEV; - } + lock_fb_info(info); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_blank(info, arg); info->flags &= ~FBINFO_MISC_USEREVENT; @@ -1236,8 +1209,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, console_unlock(); break; default: - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); fb = info->fbops; if (fb->fb_ioctl) ret = fb->fb_ioctl(info, cmd, arg); @@ -1357,8 +1329,7 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, { struct fb_fix_screeninfo fix; - if (!lock_fb_info(info)) - return -ENODEV; + lock_fb_info(info); fix = info->fix; if (info->flags & FBINFO_HIDE_SMEM_START) fix.smem_start = 0; @@ -1418,8 +1389,6 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) if (!info) return -ENODEV; fb = info->fbops; - if (!fb) - return -ENODEV; mutex_lock(&info->mm_lock); if (fb->fb_mmap) { int res; @@ -1483,7 +1452,7 @@ __releases(&info->lock) if (IS_ERR(info)) return PTR_ERR(info); - mutex_lock(&info->lock); + lock_fb_info(info); if (!try_module_get(info->fbops->owner)) { res = -ENODEV; goto out; @@ -1499,7 +1468,7 @@ __releases(&info->lock) fb_deferred_io_open(info, inode, file); #endif out: - mutex_unlock(&info->lock); + unlock_fb_info(info); if (res) put_fb_info(info); return res; @@ -1512,11 +1481,11 @@ __releases(&info->lock) { struct fb_info * const info = file->private_data; - mutex_lock(&info->lock); + lock_fb_info(info); if (info->fbops->fb_release) info->fbops->fb_release(info,1); module_put(info->fbops->owner); - mutex_unlock(&info->lock); + unlock_fb_info(info); put_fb_info(info); return 0; } @@ -1734,14 +1703,10 @@ static int do_register_framebuffer(struct fb_info *fb_info) console_lock(); else atomic_inc(&ignore_console_lock_warning); - if (!lock_fb_info(fb_info)) { - ret = -ENODEV; - goto unlock_console; - } - + lock_fb_info(fb_info); ret = fbcon_fb_registered(fb_info); unlock_fb_info(fb_info); -unlock_console: + if (!lockless_register_fb) console_unlock(); else @@ -1759,11 +1724,7 @@ static int unbind_console(struct fb_info *fb_info) return -EINVAL; console_lock(); - if (!lock_fb_info(fb_info)) { - console_unlock(); - return -ENODEV; - } - + lock_fb_info(fb_info); event.info = fb_info; ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); unlock_fb_info(fb_info); diff --git a/include/linux/fb.h b/include/linux/fb.h index 288175fafaf6..aa8f18163151 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -663,7 +663,10 @@ extern struct class *fb_class; for (i = 0; i < FB_MAX; i++) \ if (!registered_fb[i]) {} else -extern int lock_fb_info(struct fb_info *info); +static inline void lock_fb_info(struct fb_info *info) +{ + mutex_lock(&info->lock); +} static inline void unlock_fb_info(struct fb_info *info) { -- cgit v1.2.3 From 0e0f3250d4402d60f4571d076ab27d5af049853e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:48 +0200 Subject: fbcon: call fbcon_fb_bind directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also remove the error return value. That's all errors for either driver bugs (trying to unbind something that isn't bound), or errors of the new driver that will take over. There's nothing the outgoing driver can do about this anyway, so switch over to void. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Sergey Senozhatsky Cc: Peter Rosin Cc: Kees Cook Cc: Konstantin Khorenko Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: Mikulas Patocka Cc: Thomas Zimmermann Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-18-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcon.c | 24 +++++++----------------- drivers/video/fbdev/core/fbmem.c | 7 ++----- include/linux/fb.h | 2 -- include/linux/fbcon.h | 2 ++ 4 files changed, 11 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index f7f3eb0f1893..d1e37afa6f80 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3046,7 +3046,7 @@ static int fbcon_mode_deleted(struct fb_info *info, } #ifdef CONFIG_VT_HW_CONSOLE_BINDING -static int fbcon_unbind(void) +static void fbcon_unbind(void) { int ret; @@ -3055,25 +3055,21 @@ static int fbcon_unbind(void) if (!ret) fbcon_has_console_bind = 0; - - return ret; } #else -static inline int fbcon_unbind(void) -{ - return -EINVAL; -} +static inline void fbcon_unbind(void) {} #endif /* CONFIG_VT_HW_CONSOLE_BINDING */ /* called with console_lock held */ -static int fbcon_fb_unbind(int idx) +void fbcon_fb_unbind(struct fb_info *info) { int i, new_idx = -1, ret = 0; + int idx = info->node; WARN_CONSOLE_UNLOCKED(); if (!fbcon_has_console_bind) - return 0; + return; for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] != idx && @@ -3106,15 +3102,13 @@ static int fbcon_fb_unbind(int idx) idx, 0); if (ret) { con2fb_map[i] = idx; - return ret; + return; } } } } - ret = fbcon_unbind(); + fbcon_unbind(); } - - return ret; } /* called with console_lock held */ @@ -3352,10 +3346,6 @@ static int fbcon_event_notify(struct notifier_block *self, mode = event->data; ret = fbcon_mode_deleted(info, mode); break; - case FB_EVENT_FB_UNBIND: - idx = info->node; - ret = fbcon_fb_unbind(idx); - break; case FB_EVENT_SET_CONSOLE_MAP: /* called with console lock held */ con2fb = event->data; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index d73762324ca2..f3fc2e5b193c 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1716,8 +1716,6 @@ static int do_register_framebuffer(struct fb_info *fb_info) static int unbind_console(struct fb_info *fb_info) { - struct fb_event event; - int ret; int i = fb_info->node; if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) @@ -1725,12 +1723,11 @@ static int unbind_console(struct fb_info *fb_info) console_lock(); lock_fb_info(fb_info); - event.info = fb_info; - ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); + fbcon_fb_unbind(fb_info); unlock_fb_info(fb_info); console_unlock(); - return ret; + return 0; } static int __unlink_framebuffer(struct fb_info *fb_info); diff --git a/include/linux/fb.h b/include/linux/fb.h index aa8f18163151..b6ce041d9e13 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -158,8 +158,6 @@ struct fb_cursor_user { #define FB_EVENT_CONBLANK 0x0C /* Get drawing requirements */ #define FB_EVENT_GET_REQ 0x0D -/* Unbind from the console if possible */ -#define FB_EVENT_FB_UNBIND 0x0E /* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 94a71e9e1257..38d44fdb6d14 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -6,11 +6,13 @@ void __init fb_console_init(void); void __exit fb_console_exit(void); int fbcon_fb_registered(struct fb_info *info); void fbcon_fb_unregistered(struct fb_info *info); +void fbcon_fb_unbind(struct fb_info *info); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} static inline int fbcon_fb_registered(struct fb_info *info) { return 0; } static inline void fbcon_fb_unregistered(struct fb_info *info) {} +static inline void fbcon_fb_unbind(struct fb_info *info) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From deb00d2785bedd379caa7aaf18c1ffb824580b9d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:49 +0200 Subject: fbdev: make unregister/unlink functions not fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Except for driver bugs (which we'll catch with a WARN_ON) this is only to report failures of the new driver taking over the console. There's nothing the outgoing driver can do about that, and no one ever bothered to actually look at these return values. So remove them all. v2: fixup unregister_framebuffer in savagefb, fbtft, ivtvfb, and neofb drivers, reported by kbuild. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: "Michał Mirosław" Cc: Peter Rosin Cc: Hans de Goede Cc: Mikulas Patocka Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-19-daniel.vetter@ffwll.ch --- drivers/media/pci/ivtv/ivtvfb.c | 6 +-- drivers/staging/fbtft/fbtft-core.c | 4 +- drivers/video/fbdev/core/fbmem.c | 73 +++++++++------------------- drivers/video/fbdev/neofb.c | 9 +--- drivers/video/fbdev/savage/savagefb_driver.c | 9 +--- include/linux/fb.h | 4 +- 6 files changed, 31 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c index 66be490ec563..299ff032f528 100644 --- a/drivers/media/pci/ivtv/ivtvfb.c +++ b/drivers/media/pci/ivtv/ivtvfb.c @@ -1246,11 +1246,7 @@ static int ivtvfb_callback_cleanup(struct device *dev, void *p) struct osd_info *oi = itv->osd_info; if (itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT) { - if (unregister_framebuffer(&itv->osd_info->ivtvfb_info)) { - IVTVFB_WARN("Framebuffer %d is in use, cannot unload\n", - itv->instance); - return 0; - } + unregister_framebuffer(&itv->osd_info->ivtvfb_info); IVTVFB_INFO("Unregister framebuffer %d\n", itv->instance); itv->ivtvfb_restore = NULL; ivtvfb_blank(FB_BLANK_VSYNC_SUSPEND, &oi->ivtvfb_info); diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 9b07badf4c6c..7cbc1bdd2d8a 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -891,7 +891,9 @@ int fbtft_unregister_framebuffer(struct fb_info *fb_info) if (par->fbtftops.unregister_backlight) par->fbtftops.unregister_backlight(par); fbtft_sysfs_exit(par); - return unregister_framebuffer(fb_info); + unregister_framebuffer(fb_info); + + return 0; } EXPORT_SYMBOL(fbtft_unregister_framebuffer); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index f3fc2e5b193c..f3bcad30d3ba 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1590,13 +1590,13 @@ static bool fb_do_apertures_overlap(struct apertures_struct *gena, return false; } -static int do_unregister_framebuffer(struct fb_info *fb_info); +static void do_unregister_framebuffer(struct fb_info *fb_info); #define VGA_FB_PHYS 0xA0000 -static int do_remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary) +static void do_remove_conflicting_framebuffers(struct apertures_struct *a, + const char *name, bool primary) { - int i, ret; + int i; /* check all firmware fbs and kick off if the base addr overlaps */ for_each_registered_fb(i) { @@ -1612,13 +1612,9 @@ static int do_remove_conflicting_framebuffers(struct apertures_struct *a, printk(KERN_INFO "fb%d: switching to %s from %s\n", i, name, registered_fb[i]->fix.id); - ret = do_unregister_framebuffer(registered_fb[i]); - if (ret) - return ret; + do_unregister_framebuffer(registered_fb[i]); } } - - return 0; } static bool lockless_register_fb; @@ -1634,11 +1630,9 @@ static int do_register_framebuffer(struct fb_info *fb_info) if (fb_check_foreignness(fb_info)) return -ENOSYS; - ret = do_remove_conflicting_framebuffers(fb_info->apertures, - fb_info->fix.id, - fb_is_primary_device(fb_info)); - if (ret) - return ret; + do_remove_conflicting_framebuffers(fb_info->apertures, + fb_info->fix.id, + fb_is_primary_device(fb_info)); if (num_registered_fb == FB_MAX) return -ENXIO; @@ -1714,32 +1708,25 @@ static int do_register_framebuffer(struct fb_info *fb_info) return ret; } -static int unbind_console(struct fb_info *fb_info) +static void unbind_console(struct fb_info *fb_info) { int i = fb_info->node; - if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) - return -EINVAL; + if (WARN_ON(i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)) + return; console_lock(); lock_fb_info(fb_info); fbcon_fb_unbind(fb_info); unlock_fb_info(fb_info); console_unlock(); - - return 0; } -static int __unlink_framebuffer(struct fb_info *fb_info); +static void __unlink_framebuffer(struct fb_info *fb_info); -static int do_unregister_framebuffer(struct fb_info *fb_info) +static void do_unregister_framebuffer(struct fb_info *fb_info) { - int ret; - - ret = unbind_console(fb_info); - - if (ret) - return -EINVAL; + unbind_console(fb_info); pm_vt_switch_unregister(fb_info->dev); @@ -1764,36 +1751,27 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) /* this may free fb info */ put_fb_info(fb_info); - return 0; } -static int __unlink_framebuffer(struct fb_info *fb_info) +static void __unlink_framebuffer(struct fb_info *fb_info) { int i; i = fb_info->node; - if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) - return -EINVAL; + if (WARN_ON(i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)) + return; if (fb_info->dev) { device_destroy(fb_class, MKDEV(FB_MAJOR, i)); fb_info->dev = NULL; } - - return 0; } -int unlink_framebuffer(struct fb_info *fb_info) +void unlink_framebuffer(struct fb_info *fb_info) { - int ret; - - ret = __unlink_framebuffer(fb_info); - if (ret) - return ret; + __unlink_framebuffer(fb_info); unbind_console(fb_info); - - return 0; } EXPORT_SYMBOL(unlink_framebuffer); @@ -1810,7 +1788,6 @@ EXPORT_SYMBOL(unlink_framebuffer); int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary) { - int ret; bool do_free = false; if (!a) { @@ -1824,13 +1801,13 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, } mutex_lock(®istration_lock); - ret = do_remove_conflicting_framebuffers(a, name, primary); + do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock); if (do_free) kfree(a); - return ret; + return 0; } EXPORT_SYMBOL(remove_conflicting_framebuffers); @@ -1927,16 +1904,12 @@ EXPORT_SYMBOL(register_framebuffer); * that the driver implements fb_open() and fb_release() to * check that no processes are using the device. */ -int +void unregister_framebuffer(struct fb_info *fb_info) { - int ret; - mutex_lock(®istration_lock); - ret = do_unregister_framebuffer(fb_info); + do_unregister_framebuffer(fb_info); mutex_unlock(®istration_lock); - - return ret; } EXPORT_SYMBOL(unregister_framebuffer); diff --git a/drivers/video/fbdev/neofb.c b/drivers/video/fbdev/neofb.c index 5d3a444083f7..b770946a0920 100644 --- a/drivers/video/fbdev/neofb.c +++ b/drivers/video/fbdev/neofb.c @@ -2122,14 +2122,7 @@ static void neofb_remove(struct pci_dev *dev) DBG("neofb_remove"); if (info) { - /* - * If unregister_framebuffer fails, then - * we will be leaving hooks that could cause - * oopsen laying around. - */ - if (unregister_framebuffer(info)) - printk(KERN_WARNING - "neofb: danger danger! Oopsen imminent!\n"); + unregister_framebuffer(info); neo_unmap_video(info); fb_destroy_modedb(info->monspecs.modedb); diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index 47b78f0138c3..512789f5f884 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -2333,14 +2333,7 @@ static void savagefb_remove(struct pci_dev *dev) DBG("savagefb_remove"); if (info) { - /* - * If unregister_framebuffer fails, then - * we will be leaving hooks that could cause - * oopsen laying around. - */ - if (unregister_framebuffer(info)) - printk(KERN_WARNING "savagefb: danger danger! " - "Oopsen imminent!\n"); + unregister_framebuffer(info); #ifdef CONFIG_FB_SAVAGE_I2C savagefb_delete_i2c_busses(info); diff --git a/include/linux/fb.h b/include/linux/fb.h index b6ce041d9e13..b90cf7d56bd8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -634,8 +634,8 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* drivers/video/fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); -extern int unregister_framebuffer(struct fb_info *fb_info); -extern int unlink_framebuffer(struct fb_info *fb_info); +extern void unregister_framebuffer(struct fb_info *fb_info); +extern void unlink_framebuffer(struct fb_info *fb_info); extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, -- cgit v1.2.3 From 50c5056356340c8b5be90440d2f32fec8c47a7c3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:52 +0200 Subject: fbdev: directly call fbcon_suspended/resumed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the sh_mobile notifier removed we can just directly call the fbcon code here. v2: Remove now unused local variable. v3: fixup !CONFIG_FRAMEBUFFER_CONSOLE, noticed by kbuild Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Greg Kroah-Hartman Cc: Prarit Bhargava Cc: Kees Cook Cc: Konstantin Khorenko Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: Peter Rosin Cc: Mikulas Patocka Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-22-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcon.c | 10 ++-------- drivers/video/fbdev/core/fbmem.c | 7 ++----- include/linux/fb.h | 8 -------- include/linux/fbcon.h | 4 ++++ 4 files changed, 8 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index d1e37afa6f80..9994111f2563 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2919,7 +2919,7 @@ static int fbcon_set_origin(struct vc_data *vc) return 0; } -static void fbcon_suspended(struct fb_info *info) +void fbcon_suspended(struct fb_info *info) { struct vc_data *vc = NULL; struct fbcon_ops *ops = info->fbcon_par; @@ -2932,7 +2932,7 @@ static void fbcon_suspended(struct fb_info *info) fbcon_cursor(vc, CM_ERASE); } -static void fbcon_resumed(struct fb_info *info) +void fbcon_resumed(struct fb_info *info) { struct vc_data *vc; struct fbcon_ops *ops = info->fbcon_par; @@ -3330,12 +3330,6 @@ static int fbcon_event_notify(struct notifier_block *self, int idx, ret = 0; switch(action) { - case FB_EVENT_SUSPEND: - fbcon_suspended(info); - break; - case FB_EVENT_RESUME: - fbcon_resumed(info); - break; case FB_EVENT_MODE_CHANGE: fbcon_modechanged(info); break; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index bee45e9405b8..73269dedcd45 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1917,17 +1917,14 @@ EXPORT_SYMBOL(unregister_framebuffer); */ void fb_set_suspend(struct fb_info *info, int state) { - struct fb_event event; - WARN_CONSOLE_UNLOCKED(); - event.info = info; if (state) { - fb_notifier_call_chain(FB_EVENT_SUSPEND, &event); + fbcon_suspended(info); info->state = FBINFO_STATE_SUSPENDED; } else { info->state = FBINFO_STATE_RUNNING; - fb_notifier_call_chain(FB_EVENT_RESUME, &event); + fbcon_resumed(info); } } EXPORT_SYMBOL(fb_set_suspend); diff --git a/include/linux/fb.h b/include/linux/fb.h index b90cf7d56bd8..794b386415b7 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -126,14 +126,6 @@ struct fb_cursor_user { /* The resolution of the passed in fb_info about to change */ #define FB_EVENT_MODE_CHANGE 0x01 -/* The display on this fb_info is being suspended, no access to the - * framebuffer is allowed any more after that call returns - */ -#define FB_EVENT_SUSPEND 0x02 -/* The display on this fb_info was resumed, you can restore the display - * if you own it - */ -#define FB_EVENT_RESUME 0x03 /* An entry from the modelist was removed */ #define FB_EVENT_MODE_DELETE 0x04 diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 38d44fdb6d14..790c42ec7b5d 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -7,12 +7,16 @@ void __exit fb_console_exit(void); int fbcon_fb_registered(struct fb_info *info); void fbcon_fb_unregistered(struct fb_info *info); void fbcon_fb_unbind(struct fb_info *info); +void fbcon_suspended(struct fb_info *info); +void fbcon_resumed(struct fb_info *info); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} static inline int fbcon_fb_registered(struct fb_info *info) { return 0; } static inline void fbcon_fb_unregistered(struct fb_info *info) {} static inline void fbcon_fb_unbind(struct fb_info *info) {} +static inline void fbcon_suspended(struct fb_info *info) {} +static inline void fbcon_resumed(struct fb_info *info) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From 13ff178ccd6d3b8074c542a911300b79c4eec255 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:53 +0200 Subject: fbcon: Call fbcon_mode_deleted/new_modelist directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not entirely clear on what new_modelist actually does, it seems exclusively for a sysfs interface. Which in the end does amount to a normal fb_set_par to check the mode, but then takes a different path in both fbmem.c and fbcon.c. I have no idea why these 2 paths are different, but then I also don't really want to find out. So just do the simple conversion to a direct function call. v2: static inline for the dummy versions, I forgot. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Mikulas Patocka Cc: Sergey Senozhatsky Cc: Kees Cook Cc: Peter Rosin Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-23-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcon.c | 14 +++----------- drivers/video/fbdev/core/fbmem.c | 22 +++++++--------------- include/linux/fb.h | 5 ----- include/linux/fbcon.h | 6 ++++++ 4 files changed, 16 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 9994111f2563..24bd6cd270d5 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3019,8 +3019,8 @@ static void fbcon_set_all_vcs(struct fb_info *info) fbcon_modechanged(info); } -static int fbcon_mode_deleted(struct fb_info *info, - struct fb_videomode *mode) +int fbcon_mode_deleted(struct fb_info *info, + struct fb_videomode *mode) { struct fb_info *fb_info; struct fbcon_display *p; @@ -3262,7 +3262,7 @@ static void fbcon_fb_blanked(struct fb_info *info, int blank) ops->blank_state = blank; } -static void fbcon_new_modelist(struct fb_info *info) +void fbcon_new_modelist(struct fb_info *info) { int i; struct vc_data *vc; @@ -3324,7 +3324,6 @@ static int fbcon_event_notify(struct notifier_block *self, { struct fb_event *event = data; struct fb_info *info = event->info; - struct fb_videomode *mode; struct fb_con2fbmap *con2fb; struct fb_blit_caps *caps; int idx, ret = 0; @@ -3336,10 +3335,6 @@ static int fbcon_event_notify(struct notifier_block *self, case FB_EVENT_MODE_CHANGE_ALL: fbcon_set_all_vcs(info); break; - case FB_EVENT_MODE_DELETE: - mode = event->data; - ret = fbcon_mode_deleted(info, mode); - break; case FB_EVENT_SET_CONSOLE_MAP: /* called with console lock held */ con2fb = event->data; @@ -3353,9 +3348,6 @@ static int fbcon_event_notify(struct notifier_block *self, case FB_EVENT_BLANK: fbcon_fb_blanked(info, *(int *)event->data); break; - case FB_EVENT_NEW_MODELIST: - fbcon_new_modelist(info); - break; case FB_EVENT_GET_REQ: caps = event->data; fbcon_get_requirement(info, caps); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 73269dedcd45..cbdd141e7695 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -966,16 +966,11 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) /* make sure we don't delete the videomode of current var */ ret = fb_mode_is_equal(&mode1, &mode2); - if (!ret) { - struct fb_event event; - - event.info = info; - event.data = &mode1; - ret = fb_notifier_call_chain(FB_EVENT_MODE_DELETE, &event); - } + if (!ret) + fbcon_mode_deleted(info, &mode1); if (!ret) - fb_delete_videomode(&mode1, &info->modelist); + fb_delete_videomode(&mode1, &info->modelist); ret = (ret) ? -EINVAL : 0; @@ -1992,7 +1987,6 @@ subsys_initcall(fbmem_init); int fb_new_modelist(struct fb_info *info) { - struct fb_event event; struct fb_var_screeninfo var = info->var; struct list_head *pos, *n; struct fb_modelist *modelist; @@ -2012,14 +2006,12 @@ int fb_new_modelist(struct fb_info *info) } } - err = 1; + if (list_empty(&info->modelist)) + return 1; - if (!list_empty(&info->modelist)) { - event.info = info; - err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event); - } + fbcon_new_modelist(info); - return err; + return 0; } MODULE_LICENSE("GPL"); diff --git a/include/linux/fb.h b/include/linux/fb.h index 794b386415b7..7a788ed8c7b5 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -126,8 +126,6 @@ struct fb_cursor_user { /* The resolution of the passed in fb_info about to change */ #define FB_EVENT_MODE_CHANGE 0x01 -/* An entry from the modelist was removed */ -#define FB_EVENT_MODE_DELETE 0x04 #ifdef CONFIG_GUMSTIX_AM200EPD /* only used by mach-pxa/am200epd.c */ @@ -142,9 +140,6 @@ struct fb_cursor_user { /* A hardware display blank change occurred */ #define FB_EVENT_BLANK 0x09 /* Private modelist is to be replaced */ -#define FB_EVENT_NEW_MODELIST 0x0A -/* The resolution of the passed in fb_info about to change and - all vc's should be changed */ #define FB_EVENT_MODE_CHANGE_ALL 0x0B /* A software display blank change occurred */ #define FB_EVENT_CONBLANK 0x0C diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 790c42ec7b5d..c139834342f5 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -9,6 +9,9 @@ void fbcon_fb_unregistered(struct fb_info *info); void fbcon_fb_unbind(struct fb_info *info); void fbcon_suspended(struct fb_info *info); void fbcon_resumed(struct fb_info *info); +int fbcon_mode_deleted(struct fb_info *info, + struct fb_videomode *mode); +void fbcon_new_modelist(struct fb_info *info); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -17,6 +20,9 @@ static inline void fbcon_fb_unregistered(struct fb_info *info) {} static inline void fbcon_fb_unbind(struct fb_info *info) {} static inline void fbcon_suspended(struct fb_info *info) {} static inline void fbcon_resumed(struct fb_info *info) {} +static inline int fbcon_mode_deleted(struct fb_info *info, + struct fb_videomode *mode) { return 0; } +static inline void fbcon_new_modelist(struct fb_info *info) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From 0526c2239ad8ceef98652fe8e059044c24c62ea7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:54 +0200 Subject: fbdev: Call fbcon_get_requirement directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty simple case really. v2: Forgot to remove a break; v3: Add static inline to the dummy versions. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: "Steven Rostedt (VMware)" Cc: Prarit Bhargava Cc: Kees Cook Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: Peter Rosin Cc: Mikulas Patocka Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-24-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcon.c | 9 ++------- drivers/video/fbdev/core/fbmem.c | 5 +---- include/linux/fb.h | 2 -- include/linux/fbcon.h | 4 ++++ 4 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 24bd6cd270d5..ee0bed9571aa 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3283,8 +3283,8 @@ void fbcon_new_modelist(struct fb_info *info) } } -static void fbcon_get_requirement(struct fb_info *info, - struct fb_blit_caps *caps) +void fbcon_get_requirement(struct fb_info *info, + struct fb_blit_caps *caps) { struct vc_data *vc; struct fbcon_display *p; @@ -3325,7 +3325,6 @@ static int fbcon_event_notify(struct notifier_block *self, struct fb_event *event = data; struct fb_info *info = event->info; struct fb_con2fbmap *con2fb; - struct fb_blit_caps *caps; int idx, ret = 0; switch(action) { @@ -3348,10 +3347,6 @@ static int fbcon_event_notify(struct notifier_block *self, case FB_EVENT_BLANK: fbcon_fb_blanked(info, *(int *)event->data); break; - case FB_EVENT_GET_REQ: - caps = event->data; - fbcon_get_requirement(info, caps); - break; case FB_EVENT_REMAP_ALL_CONSOLE: idx = info->node; fbcon_remap_all(idx); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index cbdd141e7695..ddc0c16b8bbf 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -932,16 +932,13 @@ EXPORT_SYMBOL(fb_pan_display); static int fb_check_caps(struct fb_info *info, struct fb_var_screeninfo *var, u32 activate) { - struct fb_event event; struct fb_blit_caps caps, fbcaps; int err = 0; memset(&caps, 0, sizeof(caps)); memset(&fbcaps, 0, sizeof(fbcaps)); caps.flags = (activate & FB_ACTIVATE_ALL) ? 1 : 0; - event.info = info; - event.data = ∩︀ - fb_notifier_call_chain(FB_EVENT_GET_REQ, &event); + fbcon_get_requirement(info, &caps); info->fbops->fb_get_caps(info, &fbcaps, var); if (((fbcaps.x ^ caps.x) & caps.x) || diff --git a/include/linux/fb.h b/include/linux/fb.h index 7a788ed8c7b5..0d86aa31bf8d 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -143,8 +143,6 @@ struct fb_cursor_user { #define FB_EVENT_MODE_CHANGE_ALL 0x0B /* A software display blank change occurred */ #define FB_EVENT_CONBLANK 0x0C -/* Get drawing requirements */ -#define FB_EVENT_GET_REQ 0x0D /* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index c139834342f5..305e4f2eddac 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -12,6 +12,8 @@ void fbcon_resumed(struct fb_info *info); int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode); void fbcon_new_modelist(struct fb_info *info); +void fbcon_get_requirement(struct fb_info *info, + struct fb_blit_caps *caps); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -23,6 +25,8 @@ static inline void fbcon_resumed(struct fb_info *info) {} static inline int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { return 0; } static inline void fbcon_new_modelist(struct fb_info *info) {} +static inline void fbcon_get_requirement(struct fb_info *info, + struct fb_blit_caps *caps) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From 7a625549ea8c14be70bc7cfaf30215401bba6da0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:55 +0200 Subject: Revert "backlight/fbcon: Add FB_EVENT_CONBLANK" This reverts commit 994efacdf9a087b52f71e620b58dfa526b0cf928. The justification is that if hw blanking fails (i.e. fbops->fb_blank) fails, then we still want to shut down the backlight. Which is exactly _not_ what fb_blank() does and so rather inconsistent if we end up with different behaviour between fbcon and direct fbdev usage. Given that the entire notifier maze is getting in the way anyway I figured it's simplest to revert this not well justified commit. v2: Add static inline to the dummy version. Cc: Richard Purdie Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Acked-by: Daniel Thompson Cc: Lee Jones Cc: Daniel Thompson Cc: Jingoo Han Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Yisheng Xie Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-25-daniel.vetter@ffwll.ch --- drivers/video/backlight/backlight.c | 2 +- drivers/video/fbdev/core/fbcon.c | 14 +------------- drivers/video/fbdev/core/fbmem.c | 1 + include/linux/fb.h | 4 +--- include/linux/fbcon.h | 2 ++ 5 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 1ef8b6fd62ac..5dc07106a59e 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -47,7 +47,7 @@ static int fb_notifier_callback(struct notifier_block *self, int fb_blank = 0; /* If we aren't interested in this event, skip it immediately ... */ - if (event != FB_EVENT_BLANK && event != FB_EVENT_CONBLANK) + if (event != FB_EVENT_BLANK) return 0; bd = container_of(self, struct backlight_device, fb_notif); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index ee0bed9571aa..be179b47d1c6 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2350,8 +2350,6 @@ static int fbcon_switch(struct vc_data *vc) static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info, int blank) { - struct fb_event event; - if (blank) { unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; @@ -2362,13 +2360,6 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info, fbcon_clear(vc, 0, 0, vc->vc_rows, vc->vc_cols); vc->vc_video_erase_char = oldc; } - - - lock_fb_info(info); - event.info = info; - event.data = ␣ - fb_notifier_call_chain(FB_EVENT_CONBLANK, &event); - unlock_fb_info(info); } static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) @@ -3240,7 +3231,7 @@ int fbcon_fb_registered(struct fb_info *info) return ret; } -static void fbcon_fb_blanked(struct fb_info *info, int blank) +void fbcon_fb_blanked(struct fb_info *info, int blank) { struct fbcon_ops *ops = info->fbcon_par; struct vc_data *vc; @@ -3344,9 +3335,6 @@ static int fbcon_event_notify(struct notifier_block *self, con2fb = event->data; con2fb->framebuffer = con2fb_map[con2fb->console - 1]; break; - case FB_EVENT_BLANK: - fbcon_fb_blanked(info, *(int *)event->data); - break; case FB_EVENT_REMAP_ALL_CONSOLE: idx = info->node; fbcon_remap_all(idx); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index ddc0c16b8bbf..9366fbe99a58 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1068,6 +1068,7 @@ fb_blank(struct fb_info *info, int blank) event.data = ␣ early_ret = fb_notifier_call_chain(FB_EARLY_EVENT_BLANK, &event); + fbcon_fb_blanked(info, blank); if (info->fbops->fb_blank) ret = info->fbops->fb_blank(blank, info); diff --git a/include/linux/fb.h b/include/linux/fb.h index 0d86aa31bf8d..1e66fac3124f 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -137,12 +137,10 @@ struct fb_cursor_user { #define FB_EVENT_GET_CONSOLE_MAP 0x07 /* CONSOLE-SPECIFIC: set console to framebuffer mapping */ #define FB_EVENT_SET_CONSOLE_MAP 0x08 -/* A hardware display blank change occurred */ +/* A display blank is requested */ #define FB_EVENT_BLANK 0x09 /* Private modelist is to be replaced */ #define FB_EVENT_MODE_CHANGE_ALL 0x0B -/* A software display blank change occurred */ -#define FB_EVENT_CONBLANK 0x0C /* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 305e4f2eddac..d67d7ec51ef9 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -14,6 +14,7 @@ int fbcon_mode_deleted(struct fb_info *info, void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); +void fbcon_fb_blanked(struct fb_info *info, int blank); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -27,6 +28,7 @@ static inline int fbcon_mode_deleted(struct fb_info *info, static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} +static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From 9e1467002630065ed86c65ea28bfc9194fff6f0e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:02:59 +0200 Subject: fbcon: replace FB_EVENT_MODE_CHANGE/_ALL with direct calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new wrapper function for this, feels like there's some refactoring room here between the two modes. v2: backlight notifier is also interested in the mode change event, it calls lcd->set_mode, of which there are 3 implementations. Thanks to Maarten for spotting this. So we keep that. We can ditch the differentiation between mode change and all mode changes (because backlight notifier doesn't care), and we can drop the FBINFO_MISC_USEREVENT stuff too, because that's just to prevent recursion between fbmem.c and fbcon.c. While at it flatten the control flow a bit. v3: Need to add a static inline to the dummy function. v4: Add missing #include to sh_mob (Sam). Cc: Sam Ravnborg Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Acked-by: Daniel Thompson Cc: Maarten Lankhorst Cc: Lee Jones Cc: Daniel Thompson Cc: Jingoo Han Cc: Bartlomiej Zolnierkiewicz Cc: Daniel Vetter Cc: Hans de Goede Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: Peter Rosin Cc: Mikulas Patocka Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-29-daniel.vetter@ffwll.ch --- drivers/video/backlight/lcd.c | 1 - drivers/video/fbdev/core/fbcon.c | 15 +++++++++------ drivers/video/fbdev/core/fbmem.c | 21 ++++++++++----------- drivers/video/fbdev/sh_mobile_lcdcfb.c | 12 ++---------- include/linux/fb.h | 2 -- include/linux/fbcon.h | 2 ++ 6 files changed, 23 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 151b18776add..ecdda06989d0 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -34,7 +34,6 @@ static int fb_notifier_callback(struct notifier_block *self, switch (event) { case FB_EVENT_BLANK: case FB_EVENT_MODE_CHANGE: - case FB_EVENT_MODE_CHANGE_ALL: case FB_EARLY_EVENT_BLANK: case FB_R_EARLY_EVENT_BLANK: break; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 4afbc7d8c68c..1837985e1ffb 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3009,6 +3009,15 @@ static void fbcon_set_all_vcs(struct fb_info *info) fbcon_modechanged(info); } + +void fbcon_update_vcs(struct fb_info *info, bool all) +{ + if (all) + fbcon_set_all_vcs(info); + else + fbcon_modechanged(info); +} + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { @@ -3318,12 +3327,6 @@ static int fbcon_event_notify(struct notifier_block *self, int idx, ret = 0; switch(action) { - case FB_EVENT_MODE_CHANGE: - fbcon_modechanged(info); - break; - case FB_EVENT_MODE_CHANGE_ALL: - fbcon_set_all_vcs(info); - break; case FB_EVENT_SET_CONSOLE_MAP: /* called with console lock held */ con2fb = event->data; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 96805fe85332..dd1a708df1a7 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -957,6 +957,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) u32 activate; struct fb_var_screeninfo old_var; struct fb_videomode mode; + struct fb_event event; if (var->activate & FB_ACTIVATE_INV_MODE) { struct fb_videomode mode1, mode2; @@ -1039,19 +1040,17 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) !list_empty(&info->modelist)) ret = fb_add_videomode(&mode, &info->modelist); - if (!ret && (flags & FBINFO_MISC_USEREVENT)) { - struct fb_event event; - int evnt = (activate & FB_ACTIVATE_ALL) ? - FB_EVENT_MODE_CHANGE_ALL : - FB_EVENT_MODE_CHANGE; + if (ret) + return ret; - info->flags &= ~FBINFO_MISC_USEREVENT; - event.info = info; - event.data = &mode; - fb_notifier_call_chain(evnt, &event); - } + event.info = info; + event.data = &mode; + fb_notifier_call_chain(FB_EVENT_MODE_CHANGE, &event); - return ret; + if (flags & FBINFO_MISC_USEREVENT) + fbcon_update_vcs(info, activate & FB_ACTIVATE_ALL); + + return 0; } EXPORT_SYMBOL(fb_set_var); diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c index 015a02a29d37..b8454424910d 100644 --- a/drivers/video/fbdev/sh_mobile_lcdcfb.c +++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1757,8 +1758,6 @@ static void sh_mobile_fb_reconfig(struct fb_info *info) struct sh_mobile_lcdc_chan *ch = info->par; struct fb_var_screeninfo var; struct fb_videomode mode; - struct fb_event event; - int evnt = FB_EVENT_MODE_CHANGE_ALL; if (ch->use_count > 1 || (ch->use_count == 1 && !info->fbcon_par)) /* More framebuffer users are active */ @@ -1780,14 +1779,7 @@ static void sh_mobile_fb_reconfig(struct fb_info *info) /* Couldn't reconfigure, hopefully, can continue as before */ return; - /* - * fb_set_var() calls the notifier change internally, only if - * FBINFO_MISC_USEREVENT flag is set. Since we do not want to fake a - * user event, we have to call the chain ourselves. - */ - event.info = info; - event.data = &ch->display.mode; - fb_notifier_call_chain(evnt, &event); + fbcon_update_vcs(info, true); } /* diff --git a/include/linux/fb.h b/include/linux/fb.h index 1e66fac3124f..f9c212f9b661 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -139,8 +139,6 @@ struct fb_cursor_user { #define FB_EVENT_SET_CONSOLE_MAP 0x08 /* A display blank is requested */ #define FB_EVENT_BLANK 0x09 -/* Private modelist is to be replaced */ -#define FB_EVENT_MODE_CHANGE_ALL 0x0B /* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index d67d7ec51ef9..de31eeb22c97 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,7 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +void fbcon_update_vcs(struct fb_info *info, bool all); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -29,6 +30,7 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From 1cd51b5d200dec292577a4656803d8aeff54ad51 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:03:00 +0200 Subject: vgaswitcheroo: call fbcon_remap_all directly While at it, clean up the interface a bit and push the console locking into fbcon.c. v2: Remove now outdated comment (Lukas). v3: Forgot to add static inline to the dummy function. Acked-by: Lukas Wunner Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Lukas Wunner Cc: David Airlie Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: Bartlomiej Zolnierkiewicz Cc: Hans de Goede Cc: Yisheng Xie Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-30-daniel.vetter@ffwll.ch --- drivers/gpu/vga/vga_switcheroo.c | 11 +++-------- drivers/video/fbdev/core/fbcon.c | 14 +++++--------- include/linux/fb.h | 2 -- include/linux/fbcon.h | 2 ++ 4 files changed, 10 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index a132c37d7334..65d7541c413a 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -736,14 +737,8 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client) if (!active->driver_power_control) set_audio_state(active->id, VGA_SWITCHEROO_OFF); - if (new_client->fb_info) { - struct fb_event event; - - console_lock(); - event.info = new_client->fb_info; - fb_notifier_call_chain(FB_EVENT_REMAP_ALL_CONSOLE, &event); - console_unlock(); - } + if (new_client->fb_info) + fbcon_remap_all(new_client->fb_info); mutex_lock(&vgasr_priv.mux_hw_lock); ret = vgasr_priv.handler->switchto(new_client->id); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1837985e1ffb..44779a4371ee 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3149,17 +3149,16 @@ void fbcon_fb_unregistered(struct fb_info *info) do_unregister_con_driver(&fb_con); } -/* called with console_lock held */ -static void fbcon_remap_all(int idx) +void fbcon_remap_all(struct fb_info *info) { - int i; - - WARN_CONSOLE_UNLOCKED(); + int i, idx = info->node; + console_lock(); if (deferred_takeover) { for (i = first_fb_vc; i <= last_fb_vc; i++) con2fb_map_boot[i] = idx; fbcon_map_override(); + console_unlock(); return; } @@ -3172,6 +3171,7 @@ static void fbcon_remap_all(int idx) first_fb_vc + 1, last_fb_vc + 1); info_idx = idx; } + console_unlock(); } #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY @@ -3337,10 +3337,6 @@ static int fbcon_event_notify(struct notifier_block *self, con2fb = event->data; con2fb->framebuffer = con2fb_map[con2fb->console - 1]; break; - case FB_EVENT_REMAP_ALL_CONSOLE: - idx = info->node; - fbcon_remap_all(idx); - break; } return ret; } diff --git a/include/linux/fb.h b/include/linux/fb.h index f9c212f9b661..25e4b885f5b3 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -139,8 +139,6 @@ struct fb_cursor_user { #define FB_EVENT_SET_CONSOLE_MAP 0x08 /* A display blank is requested */ #define FB_EVENT_BLANK 0x09 -/* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ -#define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occurred */ #define FB_EARLY_EVENT_BLANK 0x10 /* A hardware display blank revert early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index de31eeb22c97..69f900d289b2 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -16,6 +16,7 @@ void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); void fbcon_update_vcs(struct fb_info *info, bool all); +void fbcon_remap_all(struct fb_info *info); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -31,6 +32,7 @@ static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} +static inline void fbcon_remap_all(struct fb_info *info) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From fe2d70d6f6ff038c20705c34695bd34ac072af14 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 May 2019 11:03:01 +0200 Subject: fbcon: Call con2fb_map functions directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are actually fbcon ioctls which just happen to be exposed through /dev/fb*. They completely ignore which fb_info they're called on, and I think the userspace tool even hardcodes to /dev/fb0. Hence just forward the entire thing to fbcon.c wholesale. Note that this patch drops the fb_lock/unlock on the set side. Since the ioctl can operate on any fb (as passed in through con2fb.framebuffer) this is bogus. Also note that fbcon.c in general never calls fb_lock on anything, so this has been badly broken already. With this the last user of the fbcon notifier callback is gone, and we can garbage collect that too. v2: add missing uaccess.h include (alpha fails to compile otherwise), reported by kbuild. v3: Remember to also drop the #defines (Maarten) v4: Add the static inline to dummy functions. Signed-off-by: Daniel Vetter Reviewed-by: Sam Ravnborg Reviewed-by: Maarten Lankhorst Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Bartlomiej Zolnierkiewicz Cc: Hans de Goede Cc: Yisheng Xie Cc: "Michał Mirosław" Cc: Peter Rosin Cc: Mikulas Patocka Link: https://patchwork.freedesktop.org/patch/msgid/20190528090304.9388-31-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbcon.c | 59 ++++++++++++++++++++++++---------------- drivers/video/fbdev/core/fbmem.c | 34 ++--------------------- include/linux/fb.h | 4 --- include/linux/fbcon.h | 4 +++ 4 files changed, 42 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 44779a4371ee..31d6a4e54436 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -76,6 +76,7 @@ #include #include #include /* For counting font checksums */ +#include #include #include @@ -3318,29 +3319,47 @@ void fbcon_get_requirement(struct fb_info *info, } } -static int fbcon_event_notify(struct notifier_block *self, - unsigned long action, void *data) +int fbcon_set_con2fb_map_ioctl(void __user *argp) { - struct fb_event *event = data; - struct fb_info *info = event->info; - struct fb_con2fbmap *con2fb; - int idx, ret = 0; + struct fb_con2fbmap con2fb; + int ret; - switch(action) { - case FB_EVENT_SET_CONSOLE_MAP: - /* called with console lock held */ - con2fb = event->data; - ret = set_con2fb_map(con2fb->console - 1, - con2fb->framebuffer, 1); - break; - case FB_EVENT_GET_CONSOLE_MAP: - con2fb = event->data; - con2fb->framebuffer = con2fb_map[con2fb->console - 1]; - break; + if (copy_from_user(&con2fb, argp, sizeof(con2fb))) + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + if (con2fb.framebuffer >= FB_MAX) + return -EINVAL; + if (!registered_fb[con2fb.framebuffer]) + request_module("fb%d", con2fb.framebuffer); + if (!registered_fb[con2fb.framebuffer]) { + return -EINVAL; } + + console_lock(); + ret = set_con2fb_map(con2fb.console - 1, + con2fb.framebuffer, 1); + console_unlock(); + return ret; } +int fbcon_get_con2fb_map_ioctl(void __user *argp) +{ + struct fb_con2fbmap con2fb; + + if (copy_from_user(&con2fb, argp, sizeof(con2fb))) + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + + console_lock(); + con2fb.framebuffer = con2fb_map[con2fb.console - 1]; + console_unlock(); + + return copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; +} + /* * The console `switch' structure for the frame buffer based console */ @@ -3372,10 +3391,6 @@ static const struct consw fb_con = { .con_debug_leave = fbcon_debug_leave, }; -static struct notifier_block fbcon_event_notifier = { - .notifier_call = fbcon_event_notify, -}; - static ssize_t store_rotate(struct device *device, struct device_attribute *attr, const char *buf, size_t count) @@ -3648,7 +3663,6 @@ void __init fb_console_init(void) int i; console_lock(); - fb_register_client(&fbcon_event_notifier); fbcon_device = device_create(fb_class, NULL, MKDEV(0, 0), NULL, "fbcon"); @@ -3684,7 +3698,6 @@ static void __exit fbcon_deinit_device(void) void __exit fb_console_exit(void) { console_lock(); - fb_unregister_client(&fbcon_event_notifier); fbcon_deinit_device(); device_destroy(fb_class, MKDEV(0, 0)); fbcon_exit(); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index dd1a708df1a7..64dd732021d8 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1092,10 +1092,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, struct fb_ops *fb; struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; - struct fb_con2fbmap con2fb; struct fb_cmap cmap_from; struct fb_cmap_user cmap; - struct fb_event event; void __user *argp = (void __user *)arg; long ret = 0; @@ -1157,38 +1155,10 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, ret = -EINVAL; break; case FBIOGET_CON2FBMAP: - if (copy_from_user(&con2fb, argp, sizeof(con2fb))) - return -EFAULT; - if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) - return -EINVAL; - con2fb.framebuffer = -1; - event.data = &con2fb; - lock_fb_info(info); - event.info = info; - fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); - unlock_fb_info(info); - ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; + ret = fbcon_get_con2fb_map_ioctl(argp); break; case FBIOPUT_CON2FBMAP: - if (copy_from_user(&con2fb, argp, sizeof(con2fb))) - return -EFAULT; - if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) - return -EINVAL; - if (con2fb.framebuffer >= FB_MAX) - return -EINVAL; - if (!registered_fb[con2fb.framebuffer]) - request_module("fb%d", con2fb.framebuffer); - if (!registered_fb[con2fb.framebuffer]) { - ret = -EINVAL; - break; - } - event.data = &con2fb; - console_lock(); - lock_fb_info(info); - event.info = info; - ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); - unlock_fb_info(info); - console_unlock(); + ret = fbcon_set_con2fb_map_ioctl(argp); break; case FBIOBLANK: console_lock(); diff --git a/include/linux/fb.h b/include/linux/fb.h index 25e4b885f5b3..303771264644 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -133,10 +133,6 @@ struct fb_cursor_user { #define FB_EVENT_FB_UNREGISTERED 0x06 #endif -/* CONSOLE-SPECIFIC: get console to framebuffer mapping */ -#define FB_EVENT_GET_CONSOLE_MAP 0x07 -/* CONSOLE-SPECIFIC: set console to framebuffer mapping */ -#define FB_EVENT_SET_CONSOLE_MAP 0x08 /* A display blank is requested */ #define FB_EVENT_BLANK 0x09 /* A hardware display blank early change occurred */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 69f900d289b2..ff5596dd30f8 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -17,6 +17,8 @@ void fbcon_get_requirement(struct fb_info *info, void fbcon_fb_blanked(struct fb_info *info, int blank); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); +int fbcon_set_con2fb_map_ioctl(void __user *argp); +int fbcon_get_con2fb_map_ioctl(void __user *argp); #else static inline void fb_console_init(void) {} static inline void fb_console_exit(void) {} @@ -33,6 +35,8 @@ static inline void fbcon_get_requirement(struct fb_info *info, static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} +static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } +static inline int fbcon_get_con2fb_map_ioctl(void __user *argp) { return 0; } #endif #endif /* _LINUX_FBCON_H */ -- cgit v1.2.3 From a842fe1425cb20f457abd3f8ef98b468f83ca98b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2019 11:57:25 -0700 Subject: tcp: add optional per socket transmit delay Adding delays to TCP flows is crucial for studying behavior of TCP stacks, including congestion control modules. Linux offers netem module, but it has unpractical constraints : - Need root access to change qdisc - Hard to setup on egress if combined with non trivial qdisc like FQ - Single delay for all flows. EDT (Earliest Departure Time) adoption in TCP stack allows us to enable a per socket delay at a very small cost. Networking tools can now establish thousands of flows, each of them with a different delay, simulating real world conditions. This requires FQ packet scheduler or a EDT-enabled NIC. This patchs adds TCP_TX_DELAY socket option, to set a delay in usec units. unsigned int tx_delay = 10000; /* 10 msec */ setsockopt(fd, SOL_TCP, TCP_TX_DELAY, &tx_delay, sizeof(tx_delay)); Note that FQ packet scheduler limits might need some tweaking : man tc-fq PARAMETERS limit Hard limit on the real queue size. When this limit is reached, new packets are dropped. If the value is lowered, packets are dropped so that the new limit is met. Default is 10000 packets. flow_limit Hard limit on the maximum number of packets queued per flow. Default value is 100. Use of TCP_TX_DELAY option will increase number of skbs in FQ qdisc, so packets would be dropped if any of the previous limit is hit. Use of a jump label makes this support runtime-free, for hosts never using the option. Also note that TSQ (TCP Small Queues) limits are slightly changed with this patch : we need to account that skbs artificially delayed wont stop us providind more skbs to feed the pipe (netem uses skb_orphan_partial() for this purpose, but FQ can not use this trick) Because of that, using big delays might very well trigger old bugs in TSO auto defer logic and/or sndbuf limited detection. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ include/net/tcp.h | 19 +++++++++++++++++++ include/uapi/linux/tcp.h | 3 +++ net/ipv4/tcp.c | 24 ++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 10 ++++++---- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 23 ++++++++++++++++++++--- net/ipv6/tcp_ipv6.c | 1 + 8 files changed, 76 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 711361af9ce0..c23019a3b264 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -245,6 +245,7 @@ struct tcp_sock { syn_smc:1; /* SYN includes SMC */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ @@ -436,6 +437,7 @@ struct tcp_timewait_sock { u32 tw_last_oow_ack_time; int tw_ts_recent_stamp; + u32 tw_tx_delay; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 204328b88412..49a178b8d5b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2232,4 +2232,23 @@ void clean_acked_data_disable(struct inet_connection_sock *icsk); void clean_acked_data_flush(void); #endif +DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); +static inline void tcp_add_tx_delay(struct sk_buff *skb, + const struct tcp_sock *tp) +{ + if (static_branch_unlikely(&tcp_tx_delay_enabled)) + skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; +} + +static inline void tcp_set_tx_time(struct sk_buff *skb, + const struct sock *sk) +{ + if (static_branch_unlikely(&tcp_tx_delay_enabled)) { + u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? + tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; + + skb->skb_mstamp_ns = tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; + } +} + #endif /* _TCP_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index b521464ea962..b3564f85a762 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -127,6 +127,9 @@ enum { #define TCP_CM_INQ TCP_INQ +#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ + + #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bd0856ac680a..5542e3d778e6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2736,6 +2736,21 @@ static int tcp_repair_options_est(struct sock *sk, return 0; } +DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); +EXPORT_SYMBOL(tcp_tx_delay_enabled); + +static void tcp_enable_tx_delay(void) +{ + if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { + static int __tcp_tx_delay_enabled = 0; + + if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { + static_branch_enable(&tcp_tx_delay_enabled); + pr_info("TCP_TX_DELAY enabled\n"); + } + } +} + /* * Socket option code for TCP. */ @@ -3087,6 +3102,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->recvmsg_inq = val; break; + case TCP_TX_DELAY: + if (val) + tcp_enable_tx_delay(); + tp->tcp_tx_delay = val; + break; default: err = -ENOPROTOOPT; break; @@ -3546,6 +3566,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->fastopen_no_cookie; break; + case TCP_TX_DELAY: + val = tp->tcp_tx_delay; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f059fbd81a84..1b7e9e1fbd3b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -767,9 +767,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) + if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; + tcp_set_tx_time(skb, sk); + } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -859,9 +861,9 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) - ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_mark : sk->sk_mark; + tcp_set_tx_time(skb, sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 11011e8386dc..8bcaf2586b68 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -274,7 +274,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; tcptw->tw_last_oow_ack_time = 0; - + tcptw->tw_tx_delay = tp->tcp_tx_delay; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f429e856e263..d954ff9069e8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1153,6 +1153,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); + tcp_add_tx_delay(skb, tp); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); if (unlikely(err > 0)) { @@ -2234,6 +2236,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; + if (static_branch_unlikely(&tcp_tx_delay_enabled) && + tcp_sk(sk)->tcp_tx_delay) { + u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + + /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we + * approximate our needs assuming an ~100% skb->truesize overhead. + * USEC_PER_SEC is approximated by 2^20. + * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift. + */ + extra_bytes >>= (20 - 1); + limit += extra_bytes; + } if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send skb if rtx queue is empty. * No need to wait for TX completion to call us back, @@ -3212,6 +3226,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, int tcp_header_size; struct tcphdr *th; int mss; + u64 now; skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (unlikely(!skb)) { @@ -3243,13 +3258,14 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); + now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif { - skb->skb_mstamp_ns = tcp_clock_ns(); + skb->skb_mstamp_ns = now; if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3292,8 +3308,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif - /* Do not fool tcpdump (if any), clean our debris */ - skb->tstamp = 0; + skb->skb_mstamp_ns = now; + tcp_add_tx_delay(skb, tp); + return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad7039137a20..5606b2131b65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -892,6 +892,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } else { mark = sk->sk_mark; } + tcp_set_tx_time(buff, sk); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; -- cgit v1.2.3 From d664c43958e0d9e0b34e23b6f8a8f4cf8ec61a2e Mon Sep 17 00:00:00 2001 From: Enrico Weigelt Date: Wed, 12 Jun 2019 23:59:36 +0200 Subject: gpio: Fix build warnings on undefined struct pinctrl_dev This fixes the warnings: * include/linux/gpio.h:254:11: warning: 'struct pinctrl_dev' declared inside parameter list will not be visible outside of this definition or declaration * include/linux/gpio/driver.h:602:11: warning: 'struct pinctrl_dev' declared inside parameter list will not be visible outside of this definition or declaration Fixes: 78b99577b393 ("pinctrl: remove unused pin_is_valid()") Reported-by: kbuild test robot Signed-off-by: Enrico Weigelt Signed-off-by: Linus Walleij --- include/linux/gpio.h | 1 + include/linux/gpio/driver.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 39745b8bdd65..40915b461f18 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -106,6 +106,7 @@ void devm_gpio_free(struct device *dev, unsigned int gpio); struct device; struct gpio_chip; +struct pinctrl_dev; static inline bool gpio_is_valid(int number) { diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a1d273c96016..b58b27c11355 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -590,6 +590,8 @@ void gpiochip_remove_pin_ranges(struct gpio_chip *chip); #else +struct pinctrl_dev; + static inline int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, -- cgit v1.2.3 From 68608b5e5063dd12942f1118286c6f595d0c4a05 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 10 Jun 2019 12:18:56 +0300 Subject: firmware: ti_sci: Add resource management APIs for ringacc, psi-l and udma Configuration of NAVSS resource, like rings, UDMAP channels, flows and PSI-L thread management need to be done via TISCI. Add the needed structures and functions for NAVSS resource configuration of the following: Rings from Ring Accelerator PSI-L thread management UDMAP tchan, rchan and rflow configuration. Signed-off-by: Peter Ujfalusi Reviewed-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Santosh Shilimkar --- drivers/firmware/ti_sci.c | 488 ++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 675 +++++++++++++++++++++++++++++++++ include/linux/soc/ti/ti_sci_protocol.h | 215 +++++++++++ 3 files changed, 1378 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 36ce11a67235..02fa196428d8 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -2004,6 +2004,481 @@ static int ti_sci_cmd_free_event_map(const struct ti_sci_handle *handle, ia_id, vint, global_event, vint_status_bit, 0); } +/** + * ti_sci_cmd_ring_config() - configure RA ring + * @handle: Pointer to TI SCI handle. + * @valid_params: Bitfield defining validity of ring configuration + * parameters + * @nav_id: Device ID of Navigator Subsystem from which the ring is + * allocated + * @index: Ring index + * @addr_lo: The ring base address lo 32 bits + * @addr_hi: The ring base address hi 32 bits + * @count: Number of ring elements + * @mode: The mode of the ring + * @size: The ring element size. + * @order_id: Specifies the ring's bus order ID + * + * Return: 0 if all went well, else returns appropriate error value. + * + * See @ti_sci_msg_rm_ring_cfg_req for more info. + */ +static int ti_sci_cmd_ring_config(const struct ti_sci_handle *handle, + u32 valid_params, u16 nav_id, u16 index, + u32 addr_lo, u32 addr_hi, u32 count, + u8 mode, u8 size, u8 order_id) +{ + struct ti_sci_msg_rm_ring_cfg_req *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_RM_RING_CFG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(info->dev, "RM_RA:Message config failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_rm_ring_cfg_req *)xfer->xfer_buf; + req->valid_params = valid_params; + req->nav_id = nav_id; + req->index = index; + req->addr_lo = addr_lo; + req->addr_hi = addr_hi; + req->count = count; + req->mode = mode; + req->size = size; + req->order_id = order_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(info->dev, "RM_RA:Mbox config send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + dev_dbg(info->dev, "RM_RA:config ring %u ret:%d\n", index, ret); + return ret; +} + +/** + * ti_sci_cmd_ring_get_config() - get RA ring configuration + * @handle: Pointer to TI SCI handle. + * @nav_id: Device ID of Navigator Subsystem from which the ring is + * allocated + * @index: Ring index + * @addr_lo: Returns ring's base address lo 32 bits + * @addr_hi: Returns ring's base address hi 32 bits + * @count: Returns number of ring elements + * @mode: Returns mode of the ring + * @size: Returns ring element size + * @order_id: Returns ring's bus order ID + * + * Return: 0 if all went well, else returns appropriate error value. + * + * See @ti_sci_msg_rm_ring_get_cfg_req for more info. + */ +static int ti_sci_cmd_ring_get_config(const struct ti_sci_handle *handle, + u32 nav_id, u32 index, u8 *mode, + u32 *addr_lo, u32 *addr_hi, + u32 *count, u8 *size, u8 *order_id) +{ + struct ti_sci_msg_rm_ring_get_cfg_resp *resp; + struct ti_sci_msg_rm_ring_get_cfg_req *req; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_RM_RING_GET_CFG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(info->dev, + "RM_RA:Message get config failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_rm_ring_get_cfg_req *)xfer->xfer_buf; + req->nav_id = nav_id; + req->index = index; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(info->dev, "RM_RA:Mbox get config send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_rm_ring_get_cfg_resp *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) { + ret = -ENODEV; + } else { + if (mode) + *mode = resp->mode; + if (addr_lo) + *addr_lo = resp->addr_lo; + if (addr_hi) + *addr_hi = resp->addr_hi; + if (count) + *count = resp->count; + if (size) + *size = resp->size; + if (order_id) + *order_id = resp->order_id; + }; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + dev_dbg(info->dev, "RM_RA:get config ring %u ret:%d\n", index, ret); + return ret; +} + +/** + * ti_sci_cmd_rm_psil_pair() - Pair PSI-L source to destination thread + * @handle: Pointer to TI SCI handle. + * @nav_id: Device ID of Navigator Subsystem which should be used for + * pairing + * @src_thread: Source PSI-L thread ID + * @dst_thread: Destination PSI-L thread ID + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_rm_psil_pair(const struct ti_sci_handle *handle, + u32 nav_id, u32 src_thread, u32 dst_thread) +{ + struct ti_sci_msg_psil_pair *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_RM_PSIL_PAIR, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "RM_PSIL:Message reconfig failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_psil_pair *)xfer->xfer_buf; + req->nav_id = nav_id; + req->src_thread = src_thread; + req->dst_thread = dst_thread; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "RM_PSIL:Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -EINVAL; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_rm_psil_unpair() - Unpair PSI-L source from destination thread + * @handle: Pointer to TI SCI handle. + * @nav_id: Device ID of Navigator Subsystem which should be used for + * unpairing + * @src_thread: Source PSI-L thread ID + * @dst_thread: Destination PSI-L thread ID + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_rm_psil_unpair(const struct ti_sci_handle *handle, + u32 nav_id, u32 src_thread, u32 dst_thread) +{ + struct ti_sci_msg_psil_unpair *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_RM_PSIL_UNPAIR, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "RM_PSIL:Message reconfig failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_psil_unpair *)xfer->xfer_buf; + req->nav_id = nav_id; + req->src_thread = src_thread; + req->dst_thread = dst_thread; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "RM_PSIL:Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -EINVAL; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_rm_udmap_tx_ch_cfg() - Configure a UDMAP TX channel + * @handle: Pointer to TI SCI handle. + * @params: Pointer to ti_sci_msg_rm_udmap_tx_ch_cfg TX channel config + * structure + * + * Return: 0 if all went well, else returns appropriate error value. + * + * See @ti_sci_msg_rm_udmap_tx_ch_cfg and @ti_sci_msg_rm_udmap_tx_ch_cfg_req for + * more info. + */ +static int ti_sci_cmd_rm_udmap_tx_ch_cfg(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_tx_ch_cfg *params) +{ + struct ti_sci_msg_rm_udmap_tx_ch_cfg_req *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TISCI_MSG_RM_UDMAP_TX_CH_CFG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(info->dev, "Message TX_CH_CFG alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_rm_udmap_tx_ch_cfg_req *)xfer->xfer_buf; + req->valid_params = params->valid_params; + req->nav_id = params->nav_id; + req->index = params->index; + req->tx_pause_on_err = params->tx_pause_on_err; + req->tx_filt_einfo = params->tx_filt_einfo; + req->tx_filt_pswords = params->tx_filt_pswords; + req->tx_atype = params->tx_atype; + req->tx_chan_type = params->tx_chan_type; + req->tx_supr_tdpkt = params->tx_supr_tdpkt; + req->tx_fetch_size = params->tx_fetch_size; + req->tx_credit_count = params->tx_credit_count; + req->txcq_qnum = params->txcq_qnum; + req->tx_priority = params->tx_priority; + req->tx_qos = params->tx_qos; + req->tx_orderid = params->tx_orderid; + req->fdepth = params->fdepth; + req->tx_sched_priority = params->tx_sched_priority; + req->tx_burst_size = params->tx_burst_size; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(info->dev, "Mbox send TX_CH_CFG fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -EINVAL; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + dev_dbg(info->dev, "TX_CH_CFG: chn %u ret:%u\n", params->index, ret); + return ret; +} + +/** + * ti_sci_cmd_rm_udmap_rx_ch_cfg() - Configure a UDMAP RX channel + * @handle: Pointer to TI SCI handle. + * @params: Pointer to ti_sci_msg_rm_udmap_rx_ch_cfg RX channel config + * structure + * + * Return: 0 if all went well, else returns appropriate error value. + * + * See @ti_sci_msg_rm_udmap_rx_ch_cfg and @ti_sci_msg_rm_udmap_rx_ch_cfg_req for + * more info. + */ +static int ti_sci_cmd_rm_udmap_rx_ch_cfg(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_rx_ch_cfg *params) +{ + struct ti_sci_msg_rm_udmap_rx_ch_cfg_req *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TISCI_MSG_RM_UDMAP_RX_CH_CFG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(info->dev, "Message RX_CH_CFG alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_rm_udmap_rx_ch_cfg_req *)xfer->xfer_buf; + req->valid_params = params->valid_params; + req->nav_id = params->nav_id; + req->index = params->index; + req->rx_fetch_size = params->rx_fetch_size; + req->rxcq_qnum = params->rxcq_qnum; + req->rx_priority = params->rx_priority; + req->rx_qos = params->rx_qos; + req->rx_orderid = params->rx_orderid; + req->rx_sched_priority = params->rx_sched_priority; + req->flowid_start = params->flowid_start; + req->flowid_cnt = params->flowid_cnt; + req->rx_pause_on_err = params->rx_pause_on_err; + req->rx_atype = params->rx_atype; + req->rx_chan_type = params->rx_chan_type; + req->rx_ignore_short = params->rx_ignore_short; + req->rx_ignore_long = params->rx_ignore_long; + req->rx_burst_size = params->rx_burst_size; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(info->dev, "Mbox send RX_CH_CFG fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -EINVAL; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + dev_dbg(info->dev, "RX_CH_CFG: chn %u ret:%d\n", params->index, ret); + return ret; +} + +/** + * ti_sci_cmd_rm_udmap_rx_flow_cfg() - Configure UDMAP RX FLOW + * @handle: Pointer to TI SCI handle. + * @params: Pointer to ti_sci_msg_rm_udmap_flow_cfg RX FLOW config + * structure + * + * Return: 0 if all went well, else returns appropriate error value. + * + * See @ti_sci_msg_rm_udmap_flow_cfg and @ti_sci_msg_rm_udmap_flow_cfg_req for + * more info. + */ +static int ti_sci_cmd_rm_udmap_rx_flow_cfg(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_flow_cfg *params) +{ + struct ti_sci_msg_rm_udmap_flow_cfg_req *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info; + struct device *dev; + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TISCI_MSG_RM_UDMAP_FLOW_CFG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "RX_FL_CFG: Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_rm_udmap_flow_cfg_req *)xfer->xfer_buf; + req->valid_params = params->valid_params; + req->nav_id = params->nav_id; + req->flow_index = params->flow_index; + req->rx_einfo_present = params->rx_einfo_present; + req->rx_psinfo_present = params->rx_psinfo_present; + req->rx_error_handling = params->rx_error_handling; + req->rx_desc_type = params->rx_desc_type; + req->rx_sop_offset = params->rx_sop_offset; + req->rx_dest_qnum = params->rx_dest_qnum; + req->rx_src_tag_hi = params->rx_src_tag_hi; + req->rx_src_tag_lo = params->rx_src_tag_lo; + req->rx_dest_tag_hi = params->rx_dest_tag_hi; + req->rx_dest_tag_lo = params->rx_dest_tag_lo; + req->rx_src_tag_hi_sel = params->rx_src_tag_hi_sel; + req->rx_src_tag_lo_sel = params->rx_src_tag_lo_sel; + req->rx_dest_tag_hi_sel = params->rx_dest_tag_hi_sel; + req->rx_dest_tag_lo_sel = params->rx_dest_tag_lo_sel; + req->rx_fdq0_sz0_qnum = params->rx_fdq0_sz0_qnum; + req->rx_fdq1_qnum = params->rx_fdq1_qnum; + req->rx_fdq2_qnum = params->rx_fdq2_qnum; + req->rx_fdq3_qnum = params->rx_fdq3_qnum; + req->rx_ps_location = params->rx_ps_location; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "RX_FL_CFG: Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + ret = ti_sci_is_response_ack(resp) ? 0 : -EINVAL; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + dev_dbg(info->dev, "RX_FL_CFG: %u ret:%d\n", params->flow_index, ret); + return ret; +} + /* * ti_sci_setup_ops() - Setup the operations structures * @info: pointer to TISCI pointer @@ -2016,6 +2491,9 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) struct ti_sci_clk_ops *cops = &ops->clk_ops; struct ti_sci_rm_core_ops *rm_core_ops = &ops->rm_core_ops; struct ti_sci_rm_irq_ops *iops = &ops->rm_irq_ops; + struct ti_sci_rm_ringacc_ops *rops = &ops->rm_ring_ops; + struct ti_sci_rm_psil_ops *psilops = &ops->rm_psil_ops; + struct ti_sci_rm_udmap_ops *udmap_ops = &ops->rm_udmap_ops; core_ops->reboot_device = ti_sci_cmd_core_reboot; @@ -2055,6 +2533,16 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) iops->set_event_map = ti_sci_cmd_set_event_map; iops->free_irq = ti_sci_cmd_free_irq; iops->free_event_map = ti_sci_cmd_free_event_map; + + rops->config = ti_sci_cmd_ring_config; + rops->get_config = ti_sci_cmd_ring_get_config; + + psilops->pair = ti_sci_cmd_rm_psil_pair; + psilops->unpair = ti_sci_cmd_rm_psil_unpair; + + udmap_ops->tx_ch_cfg = ti_sci_cmd_rm_udmap_tx_ch_cfg; + udmap_ops->rx_ch_cfg = ti_sci_cmd_rm_udmap_rx_ch_cfg; + udmap_ops->rx_flow_cfg = ti_sci_cmd_rm_udmap_rx_flow_cfg; } /** diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index 4983827151bf..2bb81ec7793c 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -42,6 +42,35 @@ #define TI_SCI_MSG_SET_IRQ 0x1000 #define TI_SCI_MSG_FREE_IRQ 0x1001 +/* NAVSS resource management */ +/* Ringacc requests */ +#define TI_SCI_MSG_RM_RING_ALLOCATE 0x1100 +#define TI_SCI_MSG_RM_RING_FREE 0x1101 +#define TI_SCI_MSG_RM_RING_RECONFIG 0x1102 +#define TI_SCI_MSG_RM_RING_RESET 0x1103 +#define TI_SCI_MSG_RM_RING_CFG 0x1110 +#define TI_SCI_MSG_RM_RING_GET_CFG 0x1111 + +/* PSI-L requests */ +#define TI_SCI_MSG_RM_PSIL_PAIR 0x1280 +#define TI_SCI_MSG_RM_PSIL_UNPAIR 0x1281 + +#define TI_SCI_MSG_RM_UDMAP_TX_ALLOC 0x1200 +#define TI_SCI_MSG_RM_UDMAP_TX_FREE 0x1201 +#define TI_SCI_MSG_RM_UDMAP_RX_ALLOC 0x1210 +#define TI_SCI_MSG_RM_UDMAP_RX_FREE 0x1211 +#define TI_SCI_MSG_RM_UDMAP_FLOW_CFG 0x1220 +#define TI_SCI_MSG_RM_UDMAP_OPT_FLOW_CFG 0x1221 + +#define TISCI_MSG_RM_UDMAP_TX_CH_CFG 0x1205 +#define TISCI_MSG_RM_UDMAP_TX_CH_GET_CFG 0x1206 +#define TISCI_MSG_RM_UDMAP_RX_CH_CFG 0x1215 +#define TISCI_MSG_RM_UDMAP_RX_CH_GET_CFG 0x1216 +#define TISCI_MSG_RM_UDMAP_FLOW_CFG 0x1230 +#define TISCI_MSG_RM_UDMAP_FLOW_SIZE_THRESH_CFG 0x1231 +#define TISCI_MSG_RM_UDMAP_FLOW_GET_CFG 0x1232 +#define TISCI_MSG_RM_UDMAP_FLOW_SIZE_THRESH_GET_CFG 0x1233 + /** * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses * @type: Type of messages: One of TI_SCI_MSG* values @@ -563,4 +592,650 @@ struct ti_sci_msg_req_manage_irq { u8 secondary_host; } __packed; +/** + * struct ti_sci_msg_rm_ring_cfg_req - Configure a Navigator Subsystem ring + * + * Configures the non-real-time registers of a Navigator Subsystem ring. + * @hdr: Generic Header + * @valid_params: Bitfield defining validity of ring configuration parameters. + * The ring configuration fields are not valid, and will not be used for + * ring configuration, if their corresponding valid bit is zero. + * Valid bit usage: + * 0 - Valid bit for @tisci_msg_rm_ring_cfg_req addr_lo + * 1 - Valid bit for @tisci_msg_rm_ring_cfg_req addr_hi + * 2 - Valid bit for @tisci_msg_rm_ring_cfg_req count + * 3 - Valid bit for @tisci_msg_rm_ring_cfg_req mode + * 4 - Valid bit for @tisci_msg_rm_ring_cfg_req size + * 5 - Valid bit for @tisci_msg_rm_ring_cfg_req order_id + * @nav_id: Device ID of Navigator Subsystem from which the ring is allocated + * @index: ring index to be configured. + * @addr_lo: 32 LSBs of ring base address to be programmed into the ring's + * RING_BA_LO register + * @addr_hi: 16 MSBs of ring base address to be programmed into the ring's + * RING_BA_HI register. + * @count: Number of ring elements. Must be even if mode is CREDENTIALS or QM + * modes. + * @mode: Specifies the mode the ring is to be configured. + * @size: Specifies encoded ring element size. To calculate the encoded size use + * the formula (log2(size_bytes) - 2), where size_bytes cannot be + * greater than 256. + * @order_id: Specifies the ring's bus order ID. + */ +struct ti_sci_msg_rm_ring_cfg_req { + struct ti_sci_msg_hdr hdr; + u32 valid_params; + u16 nav_id; + u16 index; + u32 addr_lo; + u32 addr_hi; + u32 count; + u8 mode; + u8 size; + u8 order_id; +} __packed; + +/** + * struct ti_sci_msg_rm_ring_get_cfg_req - Get RA ring's configuration + * + * Gets the configuration of the non-real-time register fields of a ring. The + * host, or a supervisor of the host, who owns the ring must be the requesting + * host. The values of the non-real-time registers are returned in + * @ti_sci_msg_rm_ring_get_cfg_resp. + * + * @hdr: Generic Header + * @nav_id: Device ID of Navigator Subsystem from which the ring is allocated + * @index: ring index. + */ +struct ti_sci_msg_rm_ring_get_cfg_req { + struct ti_sci_msg_hdr hdr; + u16 nav_id; + u16 index; +} __packed; + +/** + * struct ti_sci_msg_rm_ring_get_cfg_resp - Ring get configuration response + * + * Response received by host processor after RM has handled + * @ti_sci_msg_rm_ring_get_cfg_req. The response contains the ring's + * non-real-time register values. + * + * @hdr: Generic Header + * @addr_lo: Ring 32 LSBs of base address + * @addr_hi: Ring 16 MSBs of base address. + * @count: Ring number of elements. + * @mode: Ring mode. + * @size: encoded Ring element size + * @order_id: ing order ID. + */ +struct ti_sci_msg_rm_ring_get_cfg_resp { + struct ti_sci_msg_hdr hdr; + u32 addr_lo; + u32 addr_hi; + u32 count; + u8 mode; + u8 size; + u8 order_id; +} __packed; + +/** + * struct ti_sci_msg_psil_pair - Pairs a PSI-L source thread to a destination + * thread + * @hdr: Generic Header + * @nav_id: SoC Navigator Subsystem device ID whose PSI-L config proxy is + * used to pair the source and destination threads. + * @src_thread: PSI-L source thread ID within the PSI-L System thread map. + * + * UDMAP transmit channels mapped to source threads will have their + * TCHAN_THRD_ID register programmed with the destination thread if the pairing + * is successful. + + * @dst_thread: PSI-L destination thread ID within the PSI-L System thread map. + * PSI-L destination threads start at index 0x8000. The request is NACK'd if + * the destination thread is not greater than or equal to 0x8000. + * + * UDMAP receive channels mapped to destination threads will have their + * RCHAN_THRD_ID register programmed with the source thread if the pairing + * is successful. + * + * Request type is TI_SCI_MSG_RM_PSIL_PAIR, response is a generic ACK or NACK + * message. + */ +struct ti_sci_msg_psil_pair { + struct ti_sci_msg_hdr hdr; + u32 nav_id; + u32 src_thread; + u32 dst_thread; +} __packed; + +/** + * struct ti_sci_msg_psil_unpair - Unpairs a PSI-L source thread from a + * destination thread + * @hdr: Generic Header + * @nav_id: SoC Navigator Subsystem device ID whose PSI-L config proxy is + * used to unpair the source and destination threads. + * @src_thread: PSI-L source thread ID within the PSI-L System thread map. + * + * UDMAP transmit channels mapped to source threads will have their + * TCHAN_THRD_ID register cleared if the unpairing is successful. + * + * @dst_thread: PSI-L destination thread ID within the PSI-L System thread map. + * PSI-L destination threads start at index 0x8000. The request is NACK'd if + * the destination thread is not greater than or equal to 0x8000. + * + * UDMAP receive channels mapped to destination threads will have their + * RCHAN_THRD_ID register cleared if the unpairing is successful. + * + * Request type is TI_SCI_MSG_RM_PSIL_UNPAIR, response is a generic ACK or NACK + * message. + */ +struct ti_sci_msg_psil_unpair { + struct ti_sci_msg_hdr hdr; + u32 nav_id; + u32 src_thread; + u32 dst_thread; +} __packed; + +/** + * struct ti_sci_msg_udmap_rx_flow_cfg - UDMAP receive flow configuration + * message + * @hdr: Generic Header + * @nav_id: SoC Navigator Subsystem device ID from which the receive flow is + * allocated + * @flow_index: UDMAP receive flow index for non-optional configuration. + * @rx_ch_index: Specifies the index of the receive channel using the flow_index + * @rx_einfo_present: UDMAP receive flow extended packet info present. + * @rx_psinfo_present: UDMAP receive flow PS words present. + * @rx_error_handling: UDMAP receive flow error handling configuration. Valid + * values are TI_SCI_RM_UDMAP_RX_FLOW_ERR_DROP/RETRY. + * @rx_desc_type: UDMAP receive flow descriptor type. It can be one of + * TI_SCI_RM_UDMAP_RX_FLOW_DESC_HOST/MONO. + * @rx_sop_offset: UDMAP receive flow start of packet offset. + * @rx_dest_qnum: UDMAP receive flow destination queue number. + * @rx_ps_location: UDMAP receive flow PS words location. + * 0 - end of packet descriptor + * 1 - Beginning of the data buffer + * @rx_src_tag_hi: UDMAP receive flow source tag high byte constant + * @rx_src_tag_lo: UDMAP receive flow source tag low byte constant + * @rx_dest_tag_hi: UDMAP receive flow destination tag high byte constant + * @rx_dest_tag_lo: UDMAP receive flow destination tag low byte constant + * @rx_src_tag_hi_sel: UDMAP receive flow source tag high byte selector + * @rx_src_tag_lo_sel: UDMAP receive flow source tag low byte selector + * @rx_dest_tag_hi_sel: UDMAP receive flow destination tag high byte selector + * @rx_dest_tag_lo_sel: UDMAP receive flow destination tag low byte selector + * @rx_size_thresh_en: UDMAP receive flow packet size based free buffer queue + * enable. If enabled, the ti_sci_rm_udmap_rx_flow_opt_cfg also need to be + * configured and sent. + * @rx_fdq0_sz0_qnum: UDMAP receive flow free descriptor queue 0. + * @rx_fdq1_qnum: UDMAP receive flow free descriptor queue 1. + * @rx_fdq2_qnum: UDMAP receive flow free descriptor queue 2. + * @rx_fdq3_qnum: UDMAP receive flow free descriptor queue 3. + * + * For detailed information on the settings, see the UDMAP section of the TRM. + */ +struct ti_sci_msg_udmap_rx_flow_cfg { + struct ti_sci_msg_hdr hdr; + u32 nav_id; + u32 flow_index; + u32 rx_ch_index; + u8 rx_einfo_present; + u8 rx_psinfo_present; + u8 rx_error_handling; + u8 rx_desc_type; + u16 rx_sop_offset; + u16 rx_dest_qnum; + u8 rx_ps_location; + u8 rx_src_tag_hi; + u8 rx_src_tag_lo; + u8 rx_dest_tag_hi; + u8 rx_dest_tag_lo; + u8 rx_src_tag_hi_sel; + u8 rx_src_tag_lo_sel; + u8 rx_dest_tag_hi_sel; + u8 rx_dest_tag_lo_sel; + u8 rx_size_thresh_en; + u16 rx_fdq0_sz0_qnum; + u16 rx_fdq1_qnum; + u16 rx_fdq2_qnum; + u16 rx_fdq3_qnum; +} __packed; + +/** + * struct rm_ti_sci_msg_udmap_rx_flow_opt_cfg - parameters for UDMAP receive + * flow optional configuration + * @hdr: Generic Header + * @nav_id: SoC Navigator Subsystem device ID from which the receive flow is + * allocated + * @flow_index: UDMAP receive flow index for optional configuration. + * @rx_ch_index: Specifies the index of the receive channel using the flow_index + * @rx_size_thresh0: UDMAP receive flow packet size threshold 0. + * @rx_size_thresh1: UDMAP receive flow packet size threshold 1. + * @rx_size_thresh2: UDMAP receive flow packet size threshold 2. + * @rx_fdq0_sz1_qnum: UDMAP receive flow free descriptor queue for size + * threshold 1. + * @rx_fdq0_sz2_qnum: UDMAP receive flow free descriptor queue for size + * threshold 2. + * @rx_fdq0_sz3_qnum: UDMAP receive flow free descriptor queue for size + * threshold 3. + * + * For detailed information on the settings, see the UDMAP section of the TRM. + */ +struct rm_ti_sci_msg_udmap_rx_flow_opt_cfg { + struct ti_sci_msg_hdr hdr; + u32 nav_id; + u32 flow_index; + u32 rx_ch_index; + u16 rx_size_thresh0; + u16 rx_size_thresh1; + u16 rx_size_thresh2; + u16 rx_fdq0_sz1_qnum; + u16 rx_fdq0_sz2_qnum; + u16 rx_fdq0_sz3_qnum; +} __packed; + +/** + * Configures a Navigator Subsystem UDMAP transmit channel + * + * Configures the non-real-time registers of a Navigator Subsystem UDMAP + * transmit channel. The channel index must be assigned to the host defined + * in the TISCI header via the RM board configuration resource assignment + * range list. + * + * @hdr: Generic Header + * + * @valid_params: Bitfield defining validity of tx channel configuration + * parameters. The tx channel configuration fields are not valid, and will not + * be used for ch configuration, if their corresponding valid bit is zero. + * Valid bit usage: + * 0 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_pause_on_err + * 1 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_atype + * 2 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_chan_type + * 3 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_fetch_size + * 4 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::txcq_qnum + * 5 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_priority + * 6 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_qos + * 7 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_orderid + * 8 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_sched_priority + * 9 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_filt_einfo + * 10 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_filt_pswords + * 11 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_supr_tdpkt + * 12 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_credit_count + * 13 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::fdepth + * 14 - Valid bit for @ref ti_sci_msg_rm_udmap_tx_ch_cfg::tx_burst_size + * + * @nav_id: SoC device ID of Navigator Subsystem where tx channel is located + * + * @index: UDMAP transmit channel index. + * + * @tx_pause_on_err: UDMAP transmit channel pause on error configuration to + * be programmed into the tx_pause_on_err field of the channel's TCHAN_TCFG + * register. + * + * @tx_filt_einfo: UDMAP transmit channel extended packet information passing + * configuration to be programmed into the tx_filt_einfo field of the + * channel's TCHAN_TCFG register. + * + * @tx_filt_pswords: UDMAP transmit channel protocol specific word passing + * configuration to be programmed into the tx_filt_pswords field of the + * channel's TCHAN_TCFG register. + * + * @tx_atype: UDMAP transmit channel non Ring Accelerator access pointer + * interpretation configuration to be programmed into the tx_atype field of + * the channel's TCHAN_TCFG register. + * + * @tx_chan_type: UDMAP transmit channel functional channel type and work + * passing mechanism configuration to be programmed into the tx_chan_type + * field of the channel's TCHAN_TCFG register. + * + * @tx_supr_tdpkt: UDMAP transmit channel teardown packet generation suppression + * configuration to be programmed into the tx_supr_tdpkt field of the channel's + * TCHAN_TCFG register. + * + * @tx_fetch_size: UDMAP transmit channel number of 32-bit descriptor words to + * fetch configuration to be programmed into the tx_fetch_size field of the + * channel's TCHAN_TCFG register. The user must make sure to set the maximum + * word count that can pass through the channel for any allowed descriptor type. + * + * @tx_credit_count: UDMAP transmit channel transfer request credit count + * configuration to be programmed into the count field of the TCHAN_TCREDIT + * register. Specifies how many credits for complete TRs are available. + * + * @txcq_qnum: UDMAP transmit channel completion queue configuration to be + * programmed into the txcq_qnum field of the TCHAN_TCQ register. The specified + * completion queue must be assigned to the host, or a subordinate of the host, + * requesting configuration of the transmit channel. + * + * @tx_priority: UDMAP transmit channel transmit priority value to be programmed + * into the priority field of the channel's TCHAN_TPRI_CTRL register. + * + * @tx_qos: UDMAP transmit channel transmit qos value to be programmed into the + * qos field of the channel's TCHAN_TPRI_CTRL register. + * + * @tx_orderid: UDMAP transmit channel bus order id value to be programmed into + * the orderid field of the channel's TCHAN_TPRI_CTRL register. + * + * @fdepth: UDMAP transmit channel FIFO depth configuration to be programmed + * into the fdepth field of the TCHAN_TFIFO_DEPTH register. Sets the number of + * Tx FIFO bytes which are allowed to be stored for the channel. Check the UDMAP + * section of the TRM for restrictions regarding this parameter. + * + * @tx_sched_priority: UDMAP transmit channel tx scheduling priority + * configuration to be programmed into the priority field of the channel's + * TCHAN_TST_SCHED register. + * + * @tx_burst_size: UDMAP transmit channel burst size configuration to be + * programmed into the tx_burst_size field of the TCHAN_TCFG register. + */ +struct ti_sci_msg_rm_udmap_tx_ch_cfg_req { + struct ti_sci_msg_hdr hdr; + u32 valid_params; + u16 nav_id; + u16 index; + u8 tx_pause_on_err; + u8 tx_filt_einfo; + u8 tx_filt_pswords; + u8 tx_atype; + u8 tx_chan_type; + u8 tx_supr_tdpkt; + u16 tx_fetch_size; + u8 tx_credit_count; + u16 txcq_qnum; + u8 tx_priority; + u8 tx_qos; + u8 tx_orderid; + u16 fdepth; + u8 tx_sched_priority; + u8 tx_burst_size; +} __packed; + +/** + * Configures a Navigator Subsystem UDMAP receive channel + * + * Configures the non-real-time registers of a Navigator Subsystem UDMAP + * receive channel. The channel index must be assigned to the host defined + * in the TISCI header via the RM board configuration resource assignment + * range list. + * + * @hdr: Generic Header + * + * @valid_params: Bitfield defining validity of rx channel configuration + * parameters. + * The rx channel configuration fields are not valid, and will not be used for + * ch configuration, if their corresponding valid bit is zero. + * Valid bit usage: + * 0 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_pause_on_err + * 1 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_atype + * 2 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_chan_type + * 3 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_fetch_size + * 4 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rxcq_qnum + * 5 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_priority + * 6 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_qos + * 7 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_orderid + * 8 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_sched_priority + * 9 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::flowid_start + * 10 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::flowid_cnt + * 11 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_ignore_short + * 12 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_ignore_long + * 14 - Valid bit for @ti_sci_msg_rm_udmap_rx_ch_cfg_req::rx_burst_size + * + * @nav_id: SoC device ID of Navigator Subsystem where rx channel is located + * + * @index: UDMAP receive channel index. + * + * @rx_fetch_size: UDMAP receive channel number of 32-bit descriptor words to + * fetch configuration to be programmed into the rx_fetch_size field of the + * channel's RCHAN_RCFG register. + * + * @rxcq_qnum: UDMAP receive channel completion queue configuration to be + * programmed into the rxcq_qnum field of the RCHAN_RCQ register. + * The specified completion queue must be assigned to the host, or a subordinate + * of the host, requesting configuration of the receive channel. + * + * @rx_priority: UDMAP receive channel receive priority value to be programmed + * into the priority field of the channel's RCHAN_RPRI_CTRL register. + * + * @rx_qos: UDMAP receive channel receive qos value to be programmed into the + * qos field of the channel's RCHAN_RPRI_CTRL register. + * + * @rx_orderid: UDMAP receive channel bus order id value to be programmed into + * the orderid field of the channel's RCHAN_RPRI_CTRL register. + * + * @rx_sched_priority: UDMAP receive channel rx scheduling priority + * configuration to be programmed into the priority field of the channel's + * RCHAN_RST_SCHED register. + * + * @flowid_start: UDMAP receive channel additional flows starting index + * configuration to program into the flow_start field of the RCHAN_RFLOW_RNG + * register. Specifies the starting index for flow IDs the receive channel is to + * make use of beyond the default flow. flowid_start and @ref flowid_cnt must be + * set as valid and configured together. The starting flow ID set by + * @ref flowid_cnt must be a flow index within the Navigator Subsystem's subset + * of flows beyond the default flows statically mapped to receive channels. + * The additional flows must be assigned to the host, or a subordinate of the + * host, requesting configuration of the receive channel. + * + * @flowid_cnt: UDMAP receive channel additional flows count configuration to + * program into the flowid_cnt field of the RCHAN_RFLOW_RNG register. + * This field specifies how many flow IDs are in the additional contiguous range + * of legal flow IDs for the channel. @ref flowid_start and flowid_cnt must be + * set as valid and configured together. Disabling the valid_params field bit + * for flowid_cnt indicates no flow IDs other than the default are to be + * allocated and used by the receive channel. @ref flowid_start plus flowid_cnt + * cannot be greater than the number of receive flows in the receive channel's + * Navigator Subsystem. The additional flows must be assigned to the host, or a + * subordinate of the host, requesting configuration of the receive channel. + * + * @rx_pause_on_err: UDMAP receive channel pause on error configuration to be + * programmed into the rx_pause_on_err field of the channel's RCHAN_RCFG + * register. + * + * @rx_atype: UDMAP receive channel non Ring Accelerator access pointer + * interpretation configuration to be programmed into the rx_atype field of the + * channel's RCHAN_RCFG register. + * + * @rx_chan_type: UDMAP receive channel functional channel type and work passing + * mechanism configuration to be programmed into the rx_chan_type field of the + * channel's RCHAN_RCFG register. + * + * @rx_ignore_short: UDMAP receive channel short packet treatment configuration + * to be programmed into the rx_ignore_short field of the RCHAN_RCFG register. + * + * @rx_ignore_long: UDMAP receive channel long packet treatment configuration to + * be programmed into the rx_ignore_long field of the RCHAN_RCFG register. + * + * @rx_burst_size: UDMAP receive channel burst size configuration to be + * programmed into the rx_burst_size field of the RCHAN_RCFG register. + */ +struct ti_sci_msg_rm_udmap_rx_ch_cfg_req { + struct ti_sci_msg_hdr hdr; + u32 valid_params; + u16 nav_id; + u16 index; + u16 rx_fetch_size; + u16 rxcq_qnum; + u8 rx_priority; + u8 rx_qos; + u8 rx_orderid; + u8 rx_sched_priority; + u16 flowid_start; + u16 flowid_cnt; + u8 rx_pause_on_err; + u8 rx_atype; + u8 rx_chan_type; + u8 rx_ignore_short; + u8 rx_ignore_long; + u8 rx_burst_size; +} __packed; + +/** + * Configures a Navigator Subsystem UDMAP receive flow + * + * Configures a Navigator Subsystem UDMAP receive flow's registers. + * Configuration does not include the flow registers which handle size-based + * free descriptor queue routing. + * + * The flow index must be assigned to the host defined in the TISCI header via + * the RM board configuration resource assignment range list. + * + * @hdr: Standard TISCI header + * + * @valid_params + * Bitfield defining validity of rx flow configuration parameters. The + * rx flow configuration fields are not valid, and will not be used for flow + * configuration, if their corresponding valid bit is zero. Valid bit usage: + * 0 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_einfo_present + * 1 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_psinfo_present + * 2 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_error_handling + * 3 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_desc_type + * 4 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_sop_offset + * 5 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_dest_qnum + * 6 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_src_tag_hi + * 7 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_src_tag_lo + * 8 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_dest_tag_hi + * 9 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_dest_tag_lo + * 10 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_src_tag_hi_sel + * 11 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_src_tag_lo_sel + * 12 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_dest_tag_hi_sel + * 13 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_dest_tag_lo_sel + * 14 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_fdq0_sz0_qnum + * 15 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_fdq1_sz0_qnum + * 16 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_fdq2_sz0_qnum + * 17 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_fdq3_sz0_qnum + * 18 - Valid bit for @tisci_msg_rm_udmap_flow_cfg_req::rx_ps_location + * + * @nav_id: SoC device ID of Navigator Subsystem from which the receive flow is + * allocated + * + * @flow_index: UDMAP receive flow index for non-optional configuration. + * + * @rx_einfo_present: + * UDMAP receive flow extended packet info present configuration to be + * programmed into the rx_einfo_present field of the flow's RFLOW_RFA register. + * + * @rx_psinfo_present: + * UDMAP receive flow PS words present configuration to be programmed into the + * rx_psinfo_present field of the flow's RFLOW_RFA register. + * + * @rx_error_handling: + * UDMAP receive flow error handling configuration to be programmed into the + * rx_error_handling field of the flow's RFLOW_RFA register. + * + * @rx_desc_type: + * UDMAP receive flow descriptor type configuration to be programmed into the + * rx_desc_type field field of the flow's RFLOW_RFA register. + * + * @rx_sop_offset: + * UDMAP receive flow start of packet offset configuration to be programmed + * into the rx_sop_offset field of the RFLOW_RFA register. See the UDMAP + * section of the TRM for more information on this setting. Valid values for + * this field are 0-255 bytes. + * + * @rx_dest_qnum: + * UDMAP receive flow destination queue configuration to be programmed into the + * rx_dest_qnum field of the flow's RFLOW_RFA register. The specified + * destination queue must be valid within the Navigator Subsystem and must be + * owned by the host, or a subordinate of the host, requesting allocation and + * configuration of the receive flow. + * + * @rx_src_tag_hi: + * UDMAP receive flow source tag high byte constant configuration to be + * programmed into the rx_src_tag_hi field of the flow's RFLOW_RFB register. + * See the UDMAP section of the TRM for more information on this setting. + * + * @rx_src_tag_lo: + * UDMAP receive flow source tag low byte constant configuration to be + * programmed into the rx_src_tag_lo field of the flow's RFLOW_RFB register. + * See the UDMAP section of the TRM for more information on this setting. + * + * @rx_dest_tag_hi: + * UDMAP receive flow destination tag high byte constant configuration to be + * programmed into the rx_dest_tag_hi field of the flow's RFLOW_RFB register. + * See the UDMAP section of the TRM for more information on this setting. + * + * @rx_dest_tag_lo: + * UDMAP receive flow destination tag low byte constant configuration to be + * programmed into the rx_dest_tag_lo field of the flow's RFLOW_RFB register. + * See the UDMAP section of the TRM for more information on this setting. + * + * @rx_src_tag_hi_sel: + * UDMAP receive flow source tag high byte selector configuration to be + * programmed into the rx_src_tag_hi_sel field of the RFLOW_RFC register. See + * the UDMAP section of the TRM for more information on this setting. + * + * @rx_src_tag_lo_sel: + * UDMAP receive flow source tag low byte selector configuration to be + * programmed into the rx_src_tag_lo_sel field of the RFLOW_RFC register. See + * the UDMAP section of the TRM for more information on this setting. + * + * @rx_dest_tag_hi_sel: + * UDMAP receive flow destination tag high byte selector configuration to be + * programmed into the rx_dest_tag_hi_sel field of the RFLOW_RFC register. See + * the UDMAP section of the TRM for more information on this setting. + * + * @rx_dest_tag_lo_sel: + * UDMAP receive flow destination tag low byte selector configuration to be + * programmed into the rx_dest_tag_lo_sel field of the RFLOW_RFC register. See + * the UDMAP section of the TRM for more information on this setting. + * + * @rx_fdq0_sz0_qnum: + * UDMAP receive flow free descriptor queue 0 configuration to be programmed + * into the rx_fdq0_sz0_qnum field of the flow's RFLOW_RFD register. See the + * UDMAP section of the TRM for more information on this setting. The specified + * free queue must be valid within the Navigator Subsystem and must be owned + * by the host, or a subordinate of the host, requesting allocation and + * configuration of the receive flow. + * + * @rx_fdq1_qnum: + * UDMAP receive flow free descriptor queue 1 configuration to be programmed + * into the rx_fdq1_qnum field of the flow's RFLOW_RFD register. See the + * UDMAP section of the TRM for more information on this setting. The specified + * free queue must be valid within the Navigator Subsystem and must be owned + * by the host, or a subordinate of the host, requesting allocation and + * configuration of the receive flow. + * + * @rx_fdq2_qnum: + * UDMAP receive flow free descriptor queue 2 configuration to be programmed + * into the rx_fdq2_qnum field of the flow's RFLOW_RFE register. See the + * UDMAP section of the TRM for more information on this setting. The specified + * free queue must be valid within the Navigator Subsystem and must be owned + * by the host, or a subordinate of the host, requesting allocation and + * configuration of the receive flow. + * + * @rx_fdq3_qnum: + * UDMAP receive flow free descriptor queue 3 configuration to be programmed + * into the rx_fdq3_qnum field of the flow's RFLOW_RFE register. See the + * UDMAP section of the TRM for more information on this setting. The specified + * free queue must be valid within the Navigator Subsystem and must be owned + * by the host, or a subordinate of the host, requesting allocation and + * configuration of the receive flow. + * + * @rx_ps_location: + * UDMAP receive flow PS words location configuration to be programmed into the + * rx_ps_location field of the flow's RFLOW_RFA register. + */ +struct ti_sci_msg_rm_udmap_flow_cfg_req { + struct ti_sci_msg_hdr hdr; + u32 valid_params; + u16 nav_id; + u16 flow_index; + u8 rx_einfo_present; + u8 rx_psinfo_present; + u8 rx_error_handling; + u8 rx_desc_type; + u16 rx_sop_offset; + u16 rx_dest_qnum; + u8 rx_src_tag_hi; + u8 rx_src_tag_lo; + u8 rx_dest_tag_hi; + u8 rx_dest_tag_lo; + u8 rx_src_tag_hi_sel; + u8 rx_src_tag_lo_sel; + u8 rx_dest_tag_hi_sel; + u8 rx_dest_tag_lo_sel; + u16 rx_fdq0_sz0_qnum; + u16 rx_fdq1_qnum; + u16 rx_fdq2_qnum; + u16 rx_fdq3_qnum; + u8 rx_ps_location; +} __packed; + #endif /* __TI_SCI_H */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 568722a041bf..4fd9bff5806b 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -241,6 +241,218 @@ struct ti_sci_rm_irq_ops { u16 global_event, u8 vint_status_bit); }; +/* RA config.addr_lo parameter is valid for RM ring configure TI_SCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_ADDR_LO_VALID BIT(0) +/* RA config.addr_hi parameter is valid for RM ring configure TI_SCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_ADDR_HI_VALID BIT(1) + /* RA config.count parameter is valid for RM ring configure TI_SCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_COUNT_VALID BIT(2) +/* RA config.mode parameter is valid for RM ring configure TI_SCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_MODE_VALID BIT(3) +/* RA config.size parameter is valid for RM ring configure TI_SCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID BIT(4) +/* RA config.order_id parameter is valid for RM ring configure TISCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_ORDER_ID_VALID BIT(5) + +#define TI_SCI_MSG_VALUE_RM_ALL_NO_ORDER \ + (TI_SCI_MSG_VALUE_RM_RING_ADDR_LO_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_ADDR_HI_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_COUNT_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_MODE_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID) + +/** + * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations + * @config: configure the SoC Navigator Subsystem Ring Accelerator ring + * @get_config: get the SoC Navigator Subsystem Ring Accelerator ring + * configuration + */ +struct ti_sci_rm_ringacc_ops { + int (*config)(const struct ti_sci_handle *handle, + u32 valid_params, u16 nav_id, u16 index, + u32 addr_lo, u32 addr_hi, u32 count, u8 mode, + u8 size, u8 order_id + ); + int (*get_config)(const struct ti_sci_handle *handle, + u32 nav_id, u32 index, u8 *mode, + u32 *addr_lo, u32 *addr_hi, u32 *count, + u8 *size, u8 *order_id); +}; + +/** + * struct ti_sci_rm_psil_ops - PSI-L thread operations + * @pair: pair PSI-L source thread to a destination thread. + * If the src_thread is mapped to UDMA tchan, the corresponding channel's + * TCHAN_THRD_ID register is updated. + * If the dst_thread is mapped to UDMA rchan, the corresponding channel's + * RCHAN_THRD_ID register is updated. + * @unpair: unpair PSI-L source thread from a destination thread. + * If the src_thread is mapped to UDMA tchan, the corresponding channel's + * TCHAN_THRD_ID register is cleared. + * If the dst_thread is mapped to UDMA rchan, the corresponding channel's + * RCHAN_THRD_ID register is cleared. + */ +struct ti_sci_rm_psil_ops { + int (*pair)(const struct ti_sci_handle *handle, u32 nav_id, + u32 src_thread, u32 dst_thread); + int (*unpair)(const struct ti_sci_handle *handle, u32 nav_id, + u32 src_thread, u32 dst_thread); +}; + +/* UDMAP channel types */ +#define TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR 2 +#define TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR_SB 3 /* RX only */ +#define TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR 10 +#define TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBVR 11 +#define TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR 12 +#define TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBVR 13 + +#define TI_SCI_RM_UDMAP_RX_FLOW_DESC_HOST 0 +#define TI_SCI_RM_UDMAP_RX_FLOW_DESC_MONO 2 + +#define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES 1 +#define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES 2 +#define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES 3 + +/* UDMAP TX/RX channel valid_params common declarations */ +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID BIT(0) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID BIT(1) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID BIT(2) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID BIT(3) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID BIT(4) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_PRIORITY_VALID BIT(5) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_QOS_VALID BIT(6) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_ORDER_ID_VALID BIT(7) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_SCHED_PRIORITY_VALID BIT(8) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID BIT(14) + +/** + * Configures a Navigator Subsystem UDMAP transmit channel + * + * Configures a Navigator Subsystem UDMAP transmit channel registers. + * See @ti_sci_msg_rm_udmap_tx_ch_cfg_req + */ +struct ti_sci_msg_rm_udmap_tx_ch_cfg { + u32 valid_params; +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID BIT(9) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID BIT(10) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID BIT(11) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_CREDIT_COUNT_VALID BIT(12) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FDEPTH_VALID BIT(13) + u16 nav_id; + u16 index; + u8 tx_pause_on_err; + u8 tx_filt_einfo; + u8 tx_filt_pswords; + u8 tx_atype; + u8 tx_chan_type; + u8 tx_supr_tdpkt; + u16 tx_fetch_size; + u8 tx_credit_count; + u16 txcq_qnum; + u8 tx_priority; + u8 tx_qos; + u8 tx_orderid; + u16 fdepth; + u8 tx_sched_priority; + u8 tx_burst_size; +}; + +/** + * Configures a Navigator Subsystem UDMAP receive channel + * + * Configures a Navigator Subsystem UDMAP receive channel registers. + * See @ti_sci_msg_rm_udmap_rx_ch_cfg_req + */ +struct ti_sci_msg_rm_udmap_rx_ch_cfg { + u32 valid_params; +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID BIT(9) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID BIT(10) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID BIT(11) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID BIT(12) + u16 nav_id; + u16 index; + u16 rx_fetch_size; + u16 rxcq_qnum; + u8 rx_priority; + u8 rx_qos; + u8 rx_orderid; + u8 rx_sched_priority; + u16 flowid_start; + u16 flowid_cnt; + u8 rx_pause_on_err; + u8 rx_atype; + u8 rx_chan_type; + u8 rx_ignore_short; + u8 rx_ignore_long; + u8 rx_burst_size; +}; + +/** + * Configures a Navigator Subsystem UDMAP receive flow + * + * Configures a Navigator Subsystem UDMAP receive flow's registers. + * See @tis_ci_msg_rm_udmap_flow_cfg_req + */ +struct ti_sci_msg_rm_udmap_flow_cfg { + u32 valid_params; +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID BIT(0) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID BIT(1) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID BIT(2) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID BIT(3) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SOP_OFFSET_VALID BIT(4) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID BIT(5) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_VALID BIT(6) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_VALID BIT(7) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_VALID BIT(8) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_VALID BIT(9) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID BIT(10) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID BIT(11) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID BIT(12) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID BIT(13) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID BIT(14) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID BIT(15) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID BIT(16) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID BIT(17) +#define TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PS_LOCATION_VALID BIT(18) + u16 nav_id; + u16 flow_index; + u8 rx_einfo_present; + u8 rx_psinfo_present; + u8 rx_error_handling; + u8 rx_desc_type; + u16 rx_sop_offset; + u16 rx_dest_qnum; + u8 rx_src_tag_hi; + u8 rx_src_tag_lo; + u8 rx_dest_tag_hi; + u8 rx_dest_tag_lo; + u8 rx_src_tag_hi_sel; + u8 rx_src_tag_lo_sel; + u8 rx_dest_tag_hi_sel; + u8 rx_dest_tag_lo_sel; + u16 rx_fdq0_sz0_qnum; + u16 rx_fdq1_qnum; + u16 rx_fdq2_qnum; + u16 rx_fdq3_qnum; + u8 rx_ps_location; +}; + +/** + * struct ti_sci_rm_udmap_ops - UDMA Management operations + * @tx_ch_cfg: configure SoC Navigator Subsystem UDMA transmit channel. + * @rx_ch_cfg: configure SoC Navigator Subsystem UDMA receive channel. + * @rx_flow_cfg1: configure SoC Navigator Subsystem UDMA receive flow. + */ +struct ti_sci_rm_udmap_ops { + int (*tx_ch_cfg)(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_tx_ch_cfg *params); + int (*rx_ch_cfg)(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_rx_ch_cfg *params); + int (*rx_flow_cfg)(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_udmap_flow_cfg *params); +}; + /** * struct ti_sci_ops - Function support for TI SCI * @dev_ops: Device specific operations @@ -254,6 +466,9 @@ struct ti_sci_ops { struct ti_sci_clk_ops clk_ops; struct ti_sci_rm_core_ops rm_core_ops; struct ti_sci_rm_irq_ops rm_irq_ops; + struct ti_sci_rm_ringacc_ops rm_ring_ops; + struct ti_sci_rm_psil_ops rm_psil_ops; + struct ti_sci_rm_udmap_ops rm_udmap_ops; }; /** -- cgit v1.2.3 From 1e407f337f4015c8ffc56e7cfd70e06b2e9fc9da Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 5 Jun 2019 17:33:34 -0500 Subject: firmware: ti_sci: Add support for processor control Texas Instrument's System Control Interface (TI-SCI) Message Protocol is used in Texas Instrument's System on Chip (SoC) such as those in K3 family AM654 SoC to communicate between various compute processors with a central system controller entity. The system controller provides various services including the control of other compute processors within the SoC. Extend the TI-SCI protocol support to add various TI-SCI commands to invoke services associated with power and reset control, and boot vector management of the various compute processors from the Linux kernel. Signed-off-by: Suman Anna Signed-off-by: Tero Kristo Signed-off-by: Santosh Shilimkar --- drivers/firmware/ti_sci.c | 350 +++++++++++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 135 +++++++++++++ include/linux/soc/ti/ti_sci_protocol.h | 31 +++ 3 files changed, 516 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 02fa196428d8..b47e33e7411f 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -2479,6 +2479,348 @@ fail: return ret; } +/** + * ti_sci_cmd_proc_request() - Command to request a physical processor control + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_request(const struct ti_sci_handle *handle, + u8 proc_id) +{ + struct ti_sci_msg_req_proc_request *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_PROC_REQUEST, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_proc_request *)xfer->xfer_buf; + req->processor_id = proc_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_proc_release() - Command to release a physical processor control + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_release(const struct ti_sci_handle *handle, + u8 proc_id) +{ + struct ti_sci_msg_req_proc_release *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_PROC_RELEASE, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_proc_release *)xfer->xfer_buf; + req->processor_id = proc_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_proc_handover() - Command to handover a physical processor + * control to a host in the processor's access + * control list. + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * @host_id: Host ID to get the control of the processor + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_handover(const struct ti_sci_handle *handle, + u8 proc_id, u8 host_id) +{ + struct ti_sci_msg_req_proc_handover *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_PROC_HANDOVER, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_proc_handover *)xfer->xfer_buf; + req->processor_id = proc_id; + req->host_id = host_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_proc_set_config() - Command to set the processor boot + * configuration flags + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * @config_flags_set: Configuration flags to be set + * @config_flags_clear: Configuration flags to be cleared. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_set_config(const struct ti_sci_handle *handle, + u8 proc_id, u64 bootvector, + u32 config_flags_set, + u32 config_flags_clear) +{ + struct ti_sci_msg_req_set_config *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CONFIG, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_config *)xfer->xfer_buf; + req->processor_id = proc_id; + req->bootvector_low = bootvector & TI_SCI_ADDR_LOW_MASK; + req->bootvector_high = (bootvector & TI_SCI_ADDR_HIGH_MASK) >> + TI_SCI_ADDR_HIGH_SHIFT; + req->config_flags_set = config_flags_set; + req->config_flags_clear = config_flags_clear; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_proc_set_control() - Command to set the processor boot + * control flags + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * @control_flags_set: Control flags to be set + * @control_flags_clear: Control flags to be cleared + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_set_control(const struct ti_sci_handle *handle, + u8 proc_id, u32 control_flags_set, + u32 control_flags_clear) +{ + struct ti_sci_msg_req_set_ctrl *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CTRL, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_ctrl *)xfer->xfer_buf; + req->processor_id = proc_id; + req->control_flags_set = control_flags_set; + req->control_flags_clear = control_flags_clear; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_get_boot_status() - Command to get the processor boot status + * @handle: Pointer to TI SCI handle + * @proc_id: Processor ID this request is for + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_proc_get_status(const struct ti_sci_handle *handle, + u8 proc_id, u64 *bv, u32 *cfg_flags, + u32 *ctrl_flags, u32 *sts_flags) +{ + struct ti_sci_msg_resp_get_status *resp; + struct ti_sci_msg_req_get_status *req; + struct ti_sci_info *info; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (!handle) + return -EINVAL; + if (IS_ERR(handle)) + return PTR_ERR(handle); + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_STATUS, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_status *)xfer->xfer_buf; + req->processor_id = proc_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_status *)xfer->tx_message.buf; + + if (!ti_sci_is_response_ack(resp)) { + ret = -ENODEV; + } else { + *bv = (resp->bootvector_low & TI_SCI_ADDR_LOW_MASK) | + (((u64)resp->bootvector_high << TI_SCI_ADDR_HIGH_SHIFT) & + TI_SCI_ADDR_HIGH_MASK); + *cfg_flags = resp->config_flags; + *ctrl_flags = resp->control_flags; + *sts_flags = resp->status_flags; + } + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + /* * ti_sci_setup_ops() - Setup the operations structures * @info: pointer to TISCI pointer @@ -2494,6 +2836,7 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) struct ti_sci_rm_ringacc_ops *rops = &ops->rm_ring_ops; struct ti_sci_rm_psil_ops *psilops = &ops->rm_psil_ops; struct ti_sci_rm_udmap_ops *udmap_ops = &ops->rm_udmap_ops; + struct ti_sci_proc_ops *pops = &ops->proc_ops; core_ops->reboot_device = ti_sci_cmd_core_reboot; @@ -2543,6 +2886,13 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) udmap_ops->tx_ch_cfg = ti_sci_cmd_rm_udmap_tx_ch_cfg; udmap_ops->rx_ch_cfg = ti_sci_cmd_rm_udmap_rx_ch_cfg; udmap_ops->rx_flow_cfg = ti_sci_cmd_rm_udmap_rx_flow_cfg; + + pops->request = ti_sci_cmd_proc_request; + pops->release = ti_sci_cmd_proc_release; + pops->handover = ti_sci_cmd_proc_handover; + pops->set_config = ti_sci_cmd_proc_set_config; + pops->set_control = ti_sci_cmd_proc_set_control; + pops->get_status = ti_sci_cmd_proc_get_status; } /** diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index 2bb81ec7793c..662dcffef311 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -71,6 +71,14 @@ #define TISCI_MSG_RM_UDMAP_FLOW_GET_CFG 0x1232 #define TISCI_MSG_RM_UDMAP_FLOW_SIZE_THRESH_GET_CFG 0x1233 +/* Processor Control requests */ +#define TI_SCI_MSG_PROC_REQUEST 0xc000 +#define TI_SCI_MSG_PROC_RELEASE 0xc001 +#define TI_SCI_MSG_PROC_HANDOVER 0xc005 +#define TI_SCI_MSG_SET_CONFIG 0xc100 +#define TI_SCI_MSG_SET_CTRL 0xc101 +#define TI_SCI_MSG_GET_STATUS 0xc400 + /** * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses * @type: Type of messages: One of TI_SCI_MSG* values @@ -1238,4 +1246,131 @@ struct ti_sci_msg_rm_udmap_flow_cfg_req { u8 rx_ps_location; } __packed; +/** + * struct ti_sci_msg_req_proc_request - Request a processor + * @hdr: Generic Header + * @processor_id: ID of processor being requested + * + * Request type is TI_SCI_MSG_PROC_REQUEST, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_proc_request { + struct ti_sci_msg_hdr hdr; + u8 processor_id; +} __packed; + +/** + * struct ti_sci_msg_req_proc_release - Release a processor + * @hdr: Generic Header + * @processor_id: ID of processor being released + * + * Request type is TI_SCI_MSG_PROC_RELEASE, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_proc_release { + struct ti_sci_msg_hdr hdr; + u8 processor_id; +} __packed; + +/** + * struct ti_sci_msg_req_proc_handover - Handover a processor to a host + * @hdr: Generic Header + * @processor_id: ID of processor being handed over + * @host_id: Host ID the control needs to be transferred to + * + * Request type is TI_SCI_MSG_PROC_HANDOVER, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_proc_handover { + struct ti_sci_msg_hdr hdr; + u8 processor_id; + u8 host_id; +} __packed; + +/* Boot Vector masks */ +#define TI_SCI_ADDR_LOW_MASK GENMASK_ULL(31, 0) +#define TI_SCI_ADDR_HIGH_MASK GENMASK_ULL(63, 32) +#define TI_SCI_ADDR_HIGH_SHIFT 32 + +/** + * struct ti_sci_msg_req_set_config - Set Processor boot configuration + * @hdr: Generic Header + * @processor_id: ID of processor being configured + * @bootvector_low: Lower 32 bit address (Little Endian) of boot vector + * @bootvector_high: Higher 32 bit address (Little Endian) of boot vector + * @config_flags_set: Optional Processor specific Config Flags to set. + * Setting a bit here implies the corresponding mode + * will be set + * @config_flags_clear: Optional Processor specific Config Flags to clear. + * Setting a bit here implies the corresponding mode + * will be cleared + * + * Request type is TI_SCI_MSG_PROC_HANDOVER, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_set_config { + struct ti_sci_msg_hdr hdr; + u8 processor_id; + u32 bootvector_low; + u32 bootvector_high; + u32 config_flags_set; + u32 config_flags_clear; +} __packed; + +/** + * struct ti_sci_msg_req_set_ctrl - Set Processor boot control flags + * @hdr: Generic Header + * @processor_id: ID of processor being configured + * @control_flags_set: Optional Processor specific Control Flags to set. + * Setting a bit here implies the corresponding mode + * will be set + * @control_flags_clear:Optional Processor specific Control Flags to clear. + * Setting a bit here implies the corresponding mode + * will be cleared + * + * Request type is TI_SCI_MSG_SET_CTRL, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_set_ctrl { + struct ti_sci_msg_hdr hdr; + u8 processor_id; + u32 control_flags_set; + u32 control_flags_clear; +} __packed; + +/** + * struct ti_sci_msg_req_get_status - Processor boot status request + * @hdr: Generic Header + * @processor_id: ID of processor whose status is being requested + * + * Request type is TI_SCI_MSG_GET_STATUS, response is an appropriate + * message, or NACK in case of inability to satisfy request. + */ +struct ti_sci_msg_req_get_status { + struct ti_sci_msg_hdr hdr; + u8 processor_id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_status - Processor boot status response + * @hdr: Generic Header + * @processor_id: ID of processor whose status is returned + * @bootvector_low: Lower 32 bit address (Little Endian) of boot vector + * @bootvector_high: Higher 32 bit address (Little Endian) of boot vector + * @config_flags: Optional Processor specific Config Flags set currently + * @control_flags: Optional Processor specific Control Flags set currently + * @status_flags: Optional Processor specific Status Flags set currently + * + * Response structure to a TI_SCI_MSG_GET_STATUS request. + */ +struct ti_sci_msg_resp_get_status { + struct ti_sci_msg_hdr hdr; + u8 processor_id; + u32 bootvector_low; + u32 bootvector_high; + u32 config_flags; + u32 control_flags; + u32 status_flags; +} __packed; + #endif /* __TI_SCI_H */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 4fd9bff5806b..7b3762f68df9 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -453,12 +453,42 @@ struct ti_sci_rm_udmap_ops { const struct ti_sci_msg_rm_udmap_flow_cfg *params); }; +/** + * struct ti_sci_proc_ops - Processor Control operations + * @request: Request to control a physical processor. The requesting host + * should be in the processor access list + * @release: Relinquish a physical processor control + * @handover: Handover a physical processor control to another host + * in the permitted list + * @set_config: Set base configuration of a processor + * @set_control: Setup limited control flags in specific cases + * @get_status: Get the state of physical processor + * + * NOTE: The following paramteres are generic in nature for all these ops, + * -handle: Pointer to TI SCI handle as retrieved by *ti_sci_get_handle + * -pid: Processor ID + * -hid: Host ID + */ +struct ti_sci_proc_ops { + int (*request)(const struct ti_sci_handle *handle, u8 pid); + int (*release)(const struct ti_sci_handle *handle, u8 pid); + int (*handover)(const struct ti_sci_handle *handle, u8 pid, u8 hid); + int (*set_config)(const struct ti_sci_handle *handle, u8 pid, + u64 boot_vector, u32 cfg_set, u32 cfg_clr); + int (*set_control)(const struct ti_sci_handle *handle, u8 pid, + u32 ctrl_set, u32 ctrl_clr); + int (*get_status)(const struct ti_sci_handle *handle, u8 pid, + u64 *boot_vector, u32 *cfg_flags, u32 *ctrl_flags, + u32 *status_flags); +}; + /** * struct ti_sci_ops - Function support for TI SCI * @dev_ops: Device specific operations * @clk_ops: Clock specific operations * @rm_core_ops: Resource management core operations. * @rm_irq_ops: IRQ management specific operations + * @proc_ops: Processor Control specific operations */ struct ti_sci_ops { struct ti_sci_core_ops core_ops; @@ -469,6 +499,7 @@ struct ti_sci_ops { struct ti_sci_rm_ringacc_ops rm_ring_ops; struct ti_sci_rm_psil_ops rm_psil_ops; struct ti_sci_rm_udmap_ops rm_udmap_ops; + struct ti_sci_proc_ops proc_ops; }; /** -- cgit v1.2.3 From 18c8c0954d15105b02f7d2f556b9eafae426871f Mon Sep 17 00:00:00 2001 From: Wesley Sheng Date: Tue, 30 Apr 2019 18:04:29 +0800 Subject: NTB: correct ntb_dev_ops and ntb_dev comment typos The comment for ntb_dev_ops and ntb_dev incorrectly referred to ntb_ctx_ops and ntb_device. Signed-off-by: Wesley Sheng Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- include/linux/ntb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 56a92e3ae3ae..604abc883741 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -205,7 +205,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) } /** - * struct ntb_ctx_ops - ntb device operations + * struct ntb_dev_ops - ntb device operations * @port_number: See ntb_port_number(). * @peer_port_count: See ntb_peer_port_count(). * @peer_port_number: See ntb_peer_port_number(). @@ -404,7 +404,7 @@ struct ntb_client { #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) /** - * struct ntb_device - ntb device + * struct ntb_dev - ntb device * @dev: Linux device object. * @pdev: PCI device entry of the ntb. * @topo: Detected topology of the ntb. -- cgit v1.2.3 From d7cc609fb679e11dc2b00cbe6c50cbd37ec4bfa2 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 23 May 2019 16:30:51 -0600 Subject: PCI/MSI: Support allocating virtual MSI interrupts For NTB devices, we want to be able to trigger MSI interrupts through a memory window. In these cases we may want to use more interrupts than the NTB PCI device has available in its MSI-X table. We allow for this by creating a new 'virtual' interrupt. These interrupts are allocated as usual but are not programmed into the MSI-X table (as there may not be space for them). The MSI address and data will then handled through an NTB MSI library introduced later in this series. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Jon Mason --- drivers/pci/msi.c | 54 ++++++++++++++++++++++++++++++++++++++++++++--------- include/linux/msi.h | 8 ++++++++ include/linux/pci.h | 9 +++++++++ 3 files changed, 62 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index e039b740fe74..ace978deaf93 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) { + if (desc->msi_attrib.is_virtual) + return NULL; + return desc->mask_base + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; } @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; + void __iomem *desc_addr; if (pci_msi_ignore_mask) return 0; + desc_addr = pci_msix_desc_addr(desc); + if (!desc_addr) + return 0; mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; if (flag) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); + + writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); return mask_bits; } @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) { + WARN_ON(1); + return; + } + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); msg->data = readl(base + PCI_MSIX_ENTRY_DATA); @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) } else if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) + goto skip; + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); writel(msg->data, base + PCI_MSIX_ENTRY_DATA); @@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) msg->data); } } + +skip: entry->msg = *msg; + + if (entry->write_msi_msg) + entry->write_msi_msg(entry, entry->write_msi_msg_data); + } void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) @@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); + entry->msi_attrib.is_virtual = 0; entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ @@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; int ret, i; + int vec_count = pci_msix_vec_count(dev); if (affd) masks = irq_create_affinity_masks(nvec, affd); @@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.entry_nr = entries[i].entry; else entry->msi_attrib.entry_nr = i; + + entry->msi_attrib.is_virtual = + entry->msi_attrib.entry_nr >= vec_count; + entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; @@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev, { struct msi_desc *entry; int i = 0; + void __iomem *desc_addr; for_each_pci_msi_entry(entry, dev) { if (entries) entries[i++].vector = entry->irq; - entry->masked = readl(pci_msix_desc_addr(entry) + - PCI_MSIX_ENTRY_VECTOR_CTRL); + + desc_addr = pci_msix_desc_addr(entry); + if (desc_addr) + entry->masked = readl(desc_addr + + PCI_MSIX_ENTRY_VECTOR_CTRL); + else + entry->masked = 0; + msix_mask_irq(entry, 1); } } @@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev) EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd) + int nvec, struct irq_affinity *affd, int flags) { int nr_entries; int i, j; @@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, nr_entries = pci_msix_vec_count(dev); if (nr_entries < 0) return nr_entries; - if (nvec > nr_entries) + if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) return nr_entries; if (entries) { @@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd, + int flags) { int rc, nvec = maxvec; @@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ENOSPC; } - rc = __pci_enable_msix(dev, entries, nvec, affd); + rc = __pci_enable_msix(dev, entries, nvec, affd, flags); if (rc == 0) return nvec; @@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, if (flags & PCI_IRQ_MSIX) { msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, - max_vecs, affd); + max_vecs, affd, flags); if (msix_vecs > 0) return msix_vecs; } diff --git a/include/linux/msi.h b/include/linux/msi.h index d48e919d55ae..8ad679e9d9c0 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc { * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor * + * @write_msi_msg: Callback that may be called when the MSI message + * address or data changes + * @write_msi_msg_data: Data parameter for the callback. + * * @masked: [PCI MSI/X] Mask bits * @is_msix: [PCI MSI/X] True if MSI-X * @multiple: [PCI MSI/X] log2 num of messages allocated @@ -90,6 +94,9 @@ struct msi_desc { const void *iommu_cookie; #endif + void (*write_msi_msg)(struct msi_desc *entry, void *data); + void *write_msi_msg_data; + union { /* PCI MSI/X specific data */ struct { @@ -100,6 +107,7 @@ struct msi_desc { u8 multi_cap : 3; u8 maskbit : 1; u8 is_64 : 1; + u8 is_virtual : 1; u16 entry_nr; unsigned default_irq; } msi_attrib; diff --git a/include/linux/pci.h b/include/linux/pci.h index 4a5a84d7bdd4..19b5c27c6f63 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1362,6 +1362,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ + +/* + * Virtual interrupts allow for more interrupts to be allocated + * than the device has interrupts for. These are not programmed + * into the device's MSI-X table and must be handled by some + * other driver means. + */ +#define PCI_IRQ_VIRTUAL (1 << 4) + #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) -- cgit v1.2.3 From 246a42c51bc5dd247629f86c87d5e1b7628343c4 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 23 May 2019 16:30:53 -0600 Subject: NTB: Introduce helper functions to calculate logical port number This patch introduces the "Logical Port Number" which is similar to the "Port Number" in that it enumerates the ports in the system. The original (or Physical) "Port Number" can be any number used by the hardware to uniquely identify a port in the system. The "Logical Port Number" enumerates all ports in the system from 0 to the number of ports minus one. For example a system with 5 ports might have the following port numbers which would be enumerated thusly: Port Number: 1 2 5 7 116 Logical Port Number: 0 1 2 3 4 The logical port number is useful when calculating which resources to use for which peers. So we thus define two helper functions: ntb_logical_port_number() and ntb_peer_logical_port_number() which provide the "Logical Port Number" for the local port and any peer respectively. Signed-off-by: Logan Gunthorpe Cc: Dave Jiang Cc: Allen Hubbe Cc: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 604abc883741..2fadd0352683 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -616,7 +616,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb) return ntb->ops->port_number(ntb); } - /** * ntb_peer_port_count() - get the number of peer device ports * @ntb: NTB device context. @@ -653,6 +652,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx) return ntb->ops->peer_port_number(ntb, pidx); } +/** + * ntb_logical_port_number() - get the logical port number of the local port + * @ntb: NTB device context. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the logical port number or negative value indicating an error + */ +static inline int ntb_logical_port_number(struct ntb_dev *ntb) +{ + int lport = ntb_port_number(ntb); + int pidx; + + if (lport < 0) + return lport; + + for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++) + if (lport <= ntb_peer_port_number(ntb, pidx)) + return pidx; + + return pidx; +} + +/** + * ntb_peer_logical_port_number() - get the logical peer port by given index + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the peer's logical port number or negative value indicating an error + */ +static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx) +{ + if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb)) + return pidx; + else + return pidx + 1; +} + /** * ntb_peer_port_idx() - get the peer device port index by given port number * @ntb: NTB device context. -- cgit v1.2.3 From 5f1b1f065c791de71017502ed3ba46779e231d9b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 23 May 2019 16:30:54 -0600 Subject: NTB: Introduce functions to calculate multi-port resource index When using multi-ports each port uses resources (dbs, msgs, mws, etc) on every other port. Creating a mapping for these resources such that each port has a corresponding resource on every other port is a bit tricky. Introduce the ntb_peer_resource_idx() function for this purpose. It returns the peer resource number that will correspond with the local peer index on the remote peer. Also, introduce ntb_peer_highest_mw_idx() which will use ntb_peer_resource_idx() but return the MW index starting with the highest index and working down. Signed-off-by: Logan Gunthorpe Cc: Dave Jiang Cc: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 2fadd0352683..bed421b9579b 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -1557,4 +1557,74 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, return ntb->ops->peer_msg_write(ntb, pidx, midx, msg); } +/** + * ntb_peer_resource_idx() - get a resource index for a given peer idx + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * When constructing a graph of peers, each remote peer must use a different + * resource index (mw, doorbell, etc) to communicate with each other + * peer. + * + * In a two peer system, this function should always return 0 such that + * resource 0 points to the remote peer on both ports. + * + * In a 5 peer system, this function will return the following matrix + * + * pidx \ port 0 1 2 3 4 + * 0 0 0 1 2 3 + * 1 0 1 1 2 3 + * 2 0 1 2 2 3 + * 3 0 1 2 3 3 + * + * For example, if this function is used to program peer's memory + * windows, port 0 will program MW 0 on all it's peers to point to itself. + * port 1 will program MW 0 in port 0 to point to itself and MW 1 on all + * other ports. etc. + * + * For the legacy two host case, ntb_port_number() and ntb_peer_port_number() + * both return zero and therefore this function will always return zero. + * So MW 0 on each host would be programmed to point to the other host. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx) +{ + int local_port, peer_port; + + if (pidx >= ntb_peer_port_count(ntb)) + return -EINVAL; + + local_port = ntb_logical_port_number(ntb); + peer_port = ntb_peer_logical_port_number(ntb, pidx); + + if (peer_port < local_port) + return local_port - 1; + else + return local_port; +} + +/** + * ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx + * using the highest index memory windows first + * + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * Like ntb_peer_resource_idx(), except it returns indexes starting with + * last memory window index. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx) +{ + int ret; + + ret = ntb_peer_resource_idx(ntb, pidx); + if (ret < 0) + return ret; + + return ntb_mw_count(ntb, pidx) - ret - 1; +} + #endif -- cgit v1.2.3 From 26b3a37b928457ba2cd98eaf6d7b0feca5a30fa6 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 23 May 2019 16:30:56 -0600 Subject: NTB: Introduce MSI library The NTB MSI library allows passing MSI interrupts across a memory window. This offers similar functionality to doorbells or messages except will often have much better latency and the client can potentially use significantly more remote interrupts than typical hardware provides for doorbells. (Which can be important in high-multiport setups.) The library utilizes one memory window per peer and uses the highest index memory windows. Before any ntb_msi function may be used, the user must call ntb_msi_init(). It may then setup and tear down the memory windows when the link state changes using ntb_msi_setup_mws() and ntb_msi_clear_mws(). The peer which receives the interrupt must call ntb_msim_request_irq() to assign the interrupt handler (this function is functionally similar to devm_request_irq()) and the returned descriptor must be transferred to the peer which can use it to trigger the interrupt. The triggering peer, once having received the descriptor, can trigger the interrupt by calling ntb_msi_peer_trigger(). Signed-off-by: Logan Gunthorpe Cc: Dave Jiang Cc: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/Kconfig | 11 ++ drivers/ntb/Makefile | 3 +- drivers/ntb/msi.c | 415 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ntb.h | 73 +++++++++ 4 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 drivers/ntb/msi.c (limited to 'include/linux') diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index 95944e52fa36..5760764052be 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -12,6 +12,17 @@ menuconfig NTB if NTB +config NTB_MSI + bool "MSI Interrupt Support" + depends on PCI_MSI + help + Support using MSI interrupt forwarding instead of (or in addition to) + hardware doorbells. MSI interrupts typically offer lower latency + than doorbells and more MSI interrupts can be made available to + clients. However this requires an extra memory window and support + in the hardware driver for creating the MSI interrupts. + + If unsure, say N. source "drivers/ntb/hw/Kconfig" source "drivers/ntb/test/Kconfig" diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 537226f8e78d..cc27ad2ef150 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_NTB) += ntb.o hw/ test/ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o -ntb-y := core.o +ntb-y := core.o +ntb-$(CONFIG_NTB_MSI) += msi.o diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c new file mode 100644 index 000000000000..9dddf133658f --- /dev/null +++ b/drivers/ntb/msi.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +#include +#include +#include +#include +#include + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); +MODULE_AUTHOR("Logan Gunthorpe "); +MODULE_DESCRIPTION("NTB MSI Interrupt Library"); + +struct ntb_msi { + u64 base_addr; + u64 end_addr; + + void (*desc_changed)(void *ctx); + + u32 __iomem *peer_mws[]; +}; + +/** + * ntb_msi_init() - Initialize the MSI context + * @ntb: NTB device context + * + * This function must be called before any other ntb_msi function. + * It initializes the context for MSI operations and maps + * the peer memory windows. + * + * This function reserves the last N outbound memory windows (where N + * is the number of peers). + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + phys_addr_t mw_phys_addr; + resource_size_t mw_size; + size_t struct_size; + int peer_widx; + int peers; + int ret; + int i; + + peers = ntb_peer_port_count(ntb); + if (peers <= 0) + return -EINVAL; + + struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers; + + ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + if (!ntb->msi) + return -ENOMEM; + + ntb->msi->desc_changed = desc_changed; + + for (i = 0; i < peers; i++) { + peer_widx = ntb_peer_mw_count(ntb) - 1 - i; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, + &mw_size); + if (ret) + goto unroll; + + ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr, + mw_size); + if (!ntb->msi->peer_mws[i]) { + ret = -EFAULT; + goto unroll; + } + } + + return 0; + +unroll: + for (i = 0; i < peers; i++) + if (ntb->msi->peer_mws[i]) + devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]); + + devm_kfree(&ntb->dev, ntb->msi); + ntb->msi = NULL; + return ret; +} +EXPORT_SYMBOL(ntb_msi_init); + +/** + * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows + * @ntb: NTB device context + * + * This function sets up the required inbound memory windows. It should be + * called from a work function after a link up event. + * + * Over the entire network, this function will reserves the last N + * inbound memory windows for each peer (where N is the number of peers). + * + * ntb_msi_init() must be called before this function. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + struct msi_desc *desc; + u64 addr; + int peer, peer_widx; + resource_size_t addr_align, size_align, size_max; + resource_size_t mw_size = SZ_32K; + resource_size_t mw_min_size = mw_size; + int i; + int ret; + + if (!ntb->msi) + return -EINVAL; + + desc = first_msi_entry(&ntb->pdev->dev); + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + return peer_widx; + + ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align, + NULL, NULL); + if (ret) + return ret; + + addr &= ~(addr_align - 1); + } + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) { + ret = peer_widx; + goto error_out; + } + + ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL, + &size_align, &size_max); + if (ret) + goto error_out; + + mw_size = round_up(mw_size, size_align); + mw_size = max(mw_size, size_max); + if (mw_size < mw_min_size) + mw_min_size = mw_size; + + ret = ntb_mw_set_trans(ntb, peer, peer_widx, + addr, mw_size); + if (ret) + goto error_out; + } + + ntb->msi->base_addr = addr; + ntb->msi->end_addr = addr + mw_min_size; + + return 0; + +error_out: + for (i = 0; i < peer; i++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, i, peer_widx); + } + + return ret; +} +EXPORT_SYMBOL(ntb_msi_setup_mws); + +/** + * ntb_msi_clear_mws() - Clear all inbound memory windows + * @ntb: NTB device context + * + * This function tears down the resources used by ntb_msi_setup_mws(). + */ +void ntb_msi_clear_mws(struct ntb_dev *ntb) +{ + int peer; + int peer_widx; + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, peer, peer_widx); + } +} +EXPORT_SYMBOL(ntb_msi_clear_mws); + +struct ntb_msi_devres { + struct ntb_dev *ntb; + struct msi_desc *entry; + struct ntb_msi_desc *msi_desc; +}; + +static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + u64 addr; + + addr = entry->msg.address_lo + + ((uint64_t)entry->msg.address_hi << 32); + + if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) { + dev_warn_once(&ntb->dev, + "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n", + entry->irq, addr, ntb->msi->base_addr, + ntb->msi->end_addr); + return -EFAULT; + } + + msi_desc->addr_offset = addr - ntb->msi->base_addr; + msi_desc->data = entry->msg.data; + + return 0; +} + +static void ntb_msi_write_msg(struct msi_desc *entry, void *data) +{ + struct ntb_msi_devres *dr = data; + + WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc)); + + if (dr->ntb->msi->desc_changed) + dr->ntb->msi->desc_changed(dr->ntb->ctx); +} + +static void ntbm_msi_callback_release(struct device *dev, void *res) +{ + struct ntb_msi_devres *dr = res; + + dr->entry->write_msi_msg = NULL; + dr->entry->write_msi_msg_data = NULL; +} + +static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + struct ntb_msi_devres *dr; + + dr = devres_alloc(ntbm_msi_callback_release, + sizeof(struct ntb_msi_devres), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + dr->ntb = ntb; + dr->entry = entry; + dr->msi_desc = msi_desc; + + devres_add(&ntb->dev, dr); + + dr->entry->write_msi_msg = ntb_msi_write_msg; + dr->entry->write_msi_msg_data = dr; + + return 0; +} + +/** + * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt + * @ntb: NTB device context + * @handler: Function to be called when the IRQ occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for clients which handle everything in @handler + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * This function assigns an interrupt handler to an unused + * MSI interrupt and returns the descriptor used to trigger + * it. The descriptor can then be sent to a peer to trigger + * the interrupt. + * + * The interrupt resource is managed with devres so it will + * be automatically freed when the NTB device is torn down. + * + * If an IRQ allocated with this function needs to be freed + * separately, ntbm_free_irq() must be used. + * + * Return: IRQ number assigned on success, otherwise a negative error number. + */ +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + struct msi_desc *entry; + struct irq_desc *desc; + int ret; + + if (!ntb->msi) + return -EINVAL; + + for_each_pci_msi_entry(entry, ntb->pdev) { + desc = irq_to_desc(entry->irq); + if (desc->action) + continue; + + ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, + thread_fn, 0, name, dev_id); + if (ret) + continue; + + if (ntb_msi_set_desc(ntb, entry, msi_desc)) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + continue; + } + + ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); + if (ret) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + return ret; + } + + + return entry->irq; + } + + return -ENODEV; +} +EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); + +static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) +{ + struct ntb_dev *ntb = dev_ntb(dev); + struct ntb_msi_devres *dr = res; + + return dr->ntb == ntb && dr->entry == data; +} + +/** + * ntbm_msi_free_irq() - free an interrupt + * @ntb: NTB device context + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * This function should be used to manually free IRQs allocated with + * ntbm_request_[threaded_]irq(). + */ +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) +{ + struct msi_desc *entry = irq_get_msi_desc(irq); + + entry->write_msi_msg = NULL; + entry->write_msi_msg_data = NULL; + + WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, + ntbm_msi_callback_match, entry)); + + devm_free_irq(&ntb->dev, irq, dev_id); +} +EXPORT_SYMBOL(ntbm_msi_free_irq); + +/** + * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * + * This function triggers an interrupt on a peer. It requires + * the descriptor structure to have been passed from that peer + * by some other means. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + int idx; + + if (!ntb->msi) + return -EINVAL; + + idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]); + + iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]); + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_trigger); + +/** + * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * @msi_addr: Physical address to trigger the interrupt + * + * This function allows using DMA engines to trigger an interrupt + * (for example, trigger an interrupt to process the data after + * sending it). To trigger the interrupt, write @desc.data to the address + * returned in @msi_addr + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; + phys_addr_t mw_phys_addr; + int ret; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); + if (ret) + return ret; + + if (msi_addr) + *msi_addr = mw_phys_addr + desc->addr_offset; + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_addr); diff --git a/include/linux/ntb.h b/include/linux/ntb.h index bed421b9579b..8c13538aeffe 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -58,9 +58,11 @@ #include #include +#include struct ntb_client; struct ntb_dev; +struct ntb_msi; struct pci_dev; /** @@ -426,6 +428,10 @@ struct ntb_dev { spinlock_t ctx_lock; /* block unregister until device is fully released */ struct completion released; + +#ifdef CONFIG_NTB_MSI + struct ntb_msi *msi; +#endif }; #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) @@ -1627,4 +1633,71 @@ static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx) return ntb_mw_count(ntb, pidx) - ret - 1; } +struct ntb_msi_desc { + u32 addr_offset; + u32 data; +}; + +#ifdef CONFIG_NTB_MSI + +int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx)); +int ntb_msi_setup_mws(struct ntb_dev *ntb); +void ntb_msi_clear_mws(struct ntb_dev *ntb); +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc); +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id); +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc); +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr); + +#else /* not CONFIG_NTB_MSI */ + +static inline int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + return -EOPNOTSUPP; +} +static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {} +static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, + irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return -EOPNOTSUPP; +} +static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, + void *dev_id) {} +static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + return -EOPNOTSUPP; + +} + +#endif /* CONFIG_NTB_MSI */ + +static inline int ntbm_msi_request_irq(struct ntb_dev *ntb, + irq_handler_t handler, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name, + dev_id, msi_desc); +} + #endif -- cgit v1.2.3 From fd742eaab827b47c5f2de6c1811a17075608da60 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 9 Jun 2019 13:48:12 +0200 Subject: regulator: max8952: Convert to use GPIO descriptors This finalizes the descriptor conversion of the MAX8952 driver by letting the VID0 and VID1 GPIOs be fetched from descriptors. Both VID0 and VID1 must be supplied for the VID selection to work, I add some code to preserve the semantics that if only one of the two VID gpios is supplied, it will be initialized to low. This might be a bit overzealous, but I want to preserve any implicit semantics. This is currently only used by device tree in-kernel but it is still also possible to supply the same GPIOs using a machine descriptor table if a board file is used. Ideally this should be phased over to using gpio-regulator.c that does the same thing, but it might require some refactoring and needs testing on real hardware. Cc: Tomasz Figa Cc: MyungJoo Ham Cc: Marek Szyprowski Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/regulator/max8952.c | 62 ++++++++++++++++++--------------------- include/linux/regulator/max8952.h | 3 -- 2 files changed, 28 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c index 451237efb359..8f0e4dc810f0 100644 --- a/drivers/regulator/max8952.c +++ b/drivers/regulator/max8952.c @@ -26,11 +26,9 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -50,7 +48,8 @@ enum { struct max8952_data { struct i2c_client *client; struct max8952_platform_data *pdata; - + struct gpio_desc *vid0_gpiod; + struct gpio_desc *vid1_gpiod; bool vid0; bool vid1; }; @@ -100,16 +99,15 @@ static int max8952_set_voltage_sel(struct regulator_dev *rdev, { struct max8952_data *max8952 = rdev_get_drvdata(rdev); - if (!gpio_is_valid(max8952->pdata->gpio_vid0) || - !gpio_is_valid(max8952->pdata->gpio_vid1)) { + if (!max8952->vid0_gpiod || !max8952->vid1_gpiod) { /* DVS not supported */ return -EPERM; } max8952->vid0 = selector & 0x1; max8952->vid1 = (selector >> 1) & 0x1; - gpio_set_value(max8952->pdata->gpio_vid0, max8952->vid0); - gpio_set_value(max8952->pdata->gpio_vid1, max8952->vid1); + gpiod_set_value(max8952->vid0_gpiod, max8952->vid0); + gpiod_set_value(max8952->vid1_gpiod, max8952->vid1); return 0; } @@ -147,9 +145,6 @@ static struct max8952_platform_data *max8952_parse_dt(struct device *dev) if (!pd) return NULL; - pd->gpio_vid0 = of_get_named_gpio(np, "max8952,vid-gpios", 0); - pd->gpio_vid1 = of_get_named_gpio(np, "max8952,vid-gpios", 1); - if (of_property_read_u32(np, "max8952,default-mode", &pd->default_mode)) dev_warn(dev, "Default mode not specified, assuming 0\n"); @@ -200,7 +195,7 @@ static int max8952_pmic_probe(struct i2c_client *client, struct gpio_desc *gpiod; enum gpiod_flags gflags; - int ret = 0, err = 0; + int ret = 0; if (client->dev.of_node) pdata = max8952_parse_dt(&client->dev); @@ -253,32 +248,31 @@ static int max8952_pmic_probe(struct i2c_client *client, max8952->vid0 = pdata->default_mode & 0x1; max8952->vid1 = (pdata->default_mode >> 1) & 0x1; - if (gpio_is_valid(pdata->gpio_vid0) && - gpio_is_valid(pdata->gpio_vid1)) { - unsigned long gpio_flags; - - gpio_flags = max8952->vid0 ? - GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW; - if (devm_gpio_request_one(&client->dev, pdata->gpio_vid0, - gpio_flags, "MAX8952 VID0")) - err = 1; - - gpio_flags = max8952->vid1 ? - GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW; - if (devm_gpio_request_one(&client->dev, pdata->gpio_vid1, - gpio_flags, "MAX8952 VID1")) - err = 2; - } else - err = 3; - - if (err) { + /* Fetch vid0 and vid1 GPIOs if available */ + gflags = max8952->vid0 ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + max8952->vid0_gpiod = devm_gpiod_get_index_optional(&client->dev, + "max8952,vid", + 0, gflags); + if (IS_ERR(max8952->vid0_gpiod)) + return PTR_ERR(max8952->vid0_gpiod); + gflags = max8952->vid1 ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + max8952->vid1_gpiod = devm_gpiod_get_index_optional(&client->dev, + "max8952,vid", + 1, gflags); + if (IS_ERR(max8952->vid1_gpiod)) + return PTR_ERR(max8952->vid1_gpiod); + + /* If either VID GPIO is missing just disable this */ + if (!max8952->vid0_gpiod || !max8952->vid1_gpiod) { dev_warn(&client->dev, "VID0/1 gpio invalid: " - "DVS not available.\n"); + "DVS not available.\n"); max8952->vid0 = 0; max8952->vid1 = 0; - /* Mark invalid */ - pdata->gpio_vid0 = -1; - pdata->gpio_vid1 = -1; + /* Make sure if we have any descriptors they get set to low */ + if (max8952->vid0_gpiod) + gpiod_set_value(max8952->vid0_gpiod, 0); + if (max8952->vid1_gpiod) + gpiod_set_value(max8952->vid1_gpiod, 0); /* Disable Pulldown of EN only */ max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x60); diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h index 686c42c041b5..33b6e2c09c05 100644 --- a/include/linux/regulator/max8952.h +++ b/include/linux/regulator/max8952.h @@ -118,9 +118,6 @@ enum { #define MAX8952_NUM_DVS_MODE 4 struct max8952_platform_data { - int gpio_vid0; - int gpio_vid1; - u32 default_mode; u32 dvs_mode[MAX8952_NUM_DVS_MODE]; /* MAX8952_DVS_MODEx_XXXXmV */ -- cgit v1.2.3 From 86eec50beaf3a45f6432d491072fa5c54284dbca Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Jun 2019 23:38:19 +0000 Subject: net/mlx5: Support querying max VFs from device For ECPF with eswitch manager privilege, query the host max VF count by querying the device using query_functions command. With this enhancement: 1. flow steering entries are created only for valid vports based on the max VF count of the PF. 2. Driver only queries cap of valid vport. Eswitch requires the max VFs when doing initialization, so do sr-iov init before eswitch init. Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 +++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 22 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 7 ++----- include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8e96c42d3b84..720f65bfe6a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -844,32 +844,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: @@ -893,8 +893,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_vxlan_destroy(dev->vxlan); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..2eecb831c499 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; + u16 host_total_vfs; + int err; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + err = mlx5_esw_query_functions(dev, out, sizeof(out)); + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (!err && host_total_vfs) + return host_total_vfs; + } + + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b5431f7d97cb..64155fe201ee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -470,6 +470,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; int enabled_vfs; + u16 max_vfs; }; struct mlx5_fc_stats { @@ -1103,13 +1104,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); } -#define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { - if (mlx5_core_is_ecpf_esw_manager(dev)) - return MLX5_HOST_PF_MAX_VFS; - else - return pci_sriov_get_totalvfs(dev->pdev); + return dev->priv.sriov.max_vfs; } static inline int mlx5_get_gid_table_len(u16 param) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6513b985c5e9..e3c154b573a2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9711,7 +9711,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_8[0x8]; u8 host_num_of_vfs[0x10]; - u8 reserved_at_20[0x10]; + u8 host_total_vfs[0x10]; u8 host_pci_bus[0x10]; u8 reserved_at_40[0x10]; -- cgit v1.2.3 From ca390799c2aa03632c294107fa7f647bcbdff428 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:23 +0000 Subject: net/mlx5: Change interrupt handler to call chain notifier Multiple EQs may share the same IRQ in subsequent patches. Instead of calling the IRQ handler directly, the EQ will register to an atomic chain notfier. The Linux built-in shared IRQ is not used because it forces the caller to disable the IRQ and clear affinity before free_irq() can be called. This patch is the first step in the separation of IRQ and EQ logic. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/odp.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 138 +++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 9 +- include/linux/mlx5/eq.h | 3 +- 5 files changed, 105 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 40eb8be482e4..a043af7ee366 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -920,6 +920,7 @@ struct mlx5_ib_lb_state { }; struct mlx5_ib_pf_eq { + struct notifier_block irq_nb; struct mlx5_ib_dev *dev; struct mlx5_eq *core; struct work_struct work; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91507a2e9290..ac40a4fd5598 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) mlx5_eq_update_ci(eq->core, cc, 1); } -static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type, + void *data) { - struct mlx5_ib_pf_eq *eq = eq_ptr; + struct mlx5_ib_pf_eq *eq = + container_of(nb, struct mlx5_ib_pf_eq, irq_nb); unsigned long flags; if (spin_trylock_irqsave(&eq->lock, flags)) { @@ -1553,12 +1555,12 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) goto err_mempool; } + eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PFAULT_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .context = eq, - .handler = mlx5_ib_eq_pf_int + .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); if (IS_ERR(eq->core)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 28defeaca80a..590c0fefaa25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -72,16 +72,16 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); struct mlx5_irq_info { + struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ }; struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -109,6 +109,31 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + return &eq_table->irq_info[vecidx]; +} + +static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&irq->nh, nb); +} + +static int mlx5_irq_detach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -134,10 +159,13 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; int num_eqes = 0; u32 cqn = -1; @@ -175,7 +203,7 @@ out: if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -189,16 +217,19 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; @@ -232,7 +263,7 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -254,6 +285,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -261,9 +293,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, u32 *in; int err; - if (eq_table->irq_info[vecidx].context) - return -EEXIST; - /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); @@ -306,24 +335,31 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; + irq_info = mlx5_irq_get(dev, vecidx); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); + eq->irq_nb = param->nb; + + err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); if (err) goto err_eq; - err = mlx5_debug_eq_add(dev, eq); + err = mlx5_irq_attach_nb(irq_info, param->nb); if (err) goto err_irq; + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_detach; + /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -331,8 +367,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, kvfree(in); return 0; +err_detach: + mlx5_irq_detach_nb(irq_info, param->nb); + err_irq: - free_irq(eq->irqn, eq); + free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -355,9 +394,11 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; - + err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + if (err) + mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", + err); + free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +520,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -548,14 +589,14 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_CMD_IDX, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, + .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -563,27 +604,29 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_ASYNC_IDX, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, + .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + err = create_async_eq(dev, "mlx5_async_eq", + &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PAGEREQ_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, + .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + err = create_async_eq(dev, "mlx5_pages_eq", + &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -592,11 +635,11 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err2: - destroy_async_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq.core); err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -607,19 +650,19 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -629,17 +672,17 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers @@ -837,12 +880,12 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, .mask = 0, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int + .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, name, ¶m); if (err) { @@ -940,10 +983,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..adbc228bd55d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -34,10 +34,17 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; + struct notifier_block *irq_nb; /* For destroy only */ +}; + +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; }; struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 00045cc4ea11..7909f1ff197c 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -26,8 +26,7 @@ struct mlx5_eq_param { u8 index; int nent; u64 mask; - void *context; - irq_handler_t handler; + struct notifier_block *nb; }; struct mlx5_eq * -- cgit v1.2.3 From 24163189da487b4caa751eef4e945c9333aae441 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:25 +0000 Subject: net/mlx5: Separate IRQ request/free from EQ life cycle Instead of requesting IRQ with eq creation, IRQs will be requested before EQ table creation. Instead of freeing the IRQs after EQ destroy, free IRQs after eq table destroy. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 121 ++++++++++++++++++--------- include/linux/mlx5/eq.h | 3 +- 3 files changed, 84 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ac40a4fd5598..7ce7c5bfe685 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1562,7 +1562,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, }; - eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 590c0fefaa25..f187169cbe76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -134,6 +134,64 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) return IRQ_HANDLED; } +static void irq_set_name(char *name, int vecidx) +{ + switch (vecidx) { + case MLX5_EQ_CMD_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); + break; + case MLX5_EQ_ASYNC_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); + break; + case MLX5_EQ_PAGEREQ_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); + break; + case MLX5_EQ_PFAULT_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); + break; + default: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_EQ_VEC_COMP_BASE); + break; + } +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *eq_table; + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + eq_table = priv->eq_table; + for (i = 0; i < nvec; i++) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq_info->nh); + } + return err; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -278,14 +336,12 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -335,11 +391,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - irq_info = mlx5_irq_get(dev, vecidx); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); @@ -347,15 +398,10 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; eq->irq_nb = param->nb; - err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); + err = mlx5_irq_attach_nb(mlx5_irq_get(dev, vecidx), param->nb); if (err) goto err_eq; - err = mlx5_irq_attach_nb(irq_info, param->nb); - if (err) - goto err_irq; - err = mlx5_debug_eq_add(dev, eq); if (err) goto err_detach; @@ -368,10 +414,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, return 0; err_detach: - mlx5_irq_detach_nb(irq_info, param->nb); - -err_irq: - free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); + mlx5_irq_detach_nb(mlx5_irq_get(dev, vecidx), eq->irq_nb); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -386,19 +429,14 @@ err_buf: static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; - mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + err = mlx5_irq_detach_nb(mlx5_irq_get(dev, eq->vecidx), eq->irq_nb); if (err) mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", err); - free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +517,7 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; @@ -491,7 +529,7 @@ static int create_async_eq(struct mlx5_core_dev *dev, const char *name, goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -596,7 +634,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -611,8 +649,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", - &table->async_eq.core, ¶m); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -625,8 +662,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", - &table->pages_eq.core, ¶m); + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -689,7 +725,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -698,7 +734,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -845,7 +881,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; int ncomp_vec; int nent; @@ -879,7 +914,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_RFS_ACCEL irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, @@ -887,7 +921,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .nent = nent, .nb = &eq->irq_nb, }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; @@ -1018,8 +1052,14 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + err = request_irqs(dev, nvec); + if (err) + goto err_free_irqs; + return 0; +err_free_irqs: + pci_free_irq_vectors(dev->pdev); err_free_irq_info: kfree(table->irq_info); return err; @@ -1027,10 +1067,13 @@ err_free_irq_info: static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i; + for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); + kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) @@ -1039,7 +1082,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err = alloc_irq_vectors(dev); if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); + mlx5_core_err(dev, "Failed to create IRQ vectors\n"); return err; } diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 7909f1ff197c..73ab658af764 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -30,8 +30,7 @@ struct mlx5_eq_param { }; struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, - struct mlx5_eq_param *param); +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 561aa15ad69e9d1e5a8bb277adb3209bf8091ecb Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:27 +0000 Subject: net/mlx5: Separate IRQ data from EQ table data IRQ table should only exist for mlx5_core_dev for PF and VF only. EQ table of mediated devices should hold a pointer to the IRQ table of the parent PCI device. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 125 ++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 3 + include/linux/mlx5/driver.h | 3 + 4 files changed, 98 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index f187169cbe76..cdfa35ec02fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -77,6 +77,14 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_irq_table { + struct mlx5_irq_info *irq_info; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq_async pages_eq; @@ -89,11 +97,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -109,11 +114,33 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +static int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_EQ_VEC_COMP_BASE; +} + static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_table *irq_table = dev->priv.irq_table; - return &eq_table->irq_info[vecidx]; + return &irq_table->irq_info[vecidx]; } static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, @@ -158,15 +185,12 @@ static void irq_set_name(char *name, int vecidx) static int request_irqs(struct mlx5_core_dev *dev, int nvec) { - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *eq_table; char name[MLX5_MAX_IRQ_NAME]; int err; int i; - eq_table = priv->eq_table; for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); @@ -184,7 +208,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) err_request_irq: for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); free_irq(irqn, &irq_info->nh); @@ -501,6 +525,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -796,10 +821,13 @@ EXPORT_SYMBOL(mlx5_eq_update_ci); static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -819,20 +847,22 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_irq_info *irq_info; + int irq; + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); free_cpumask_var(irq_info->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int err; int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + for (i = 0; i < nvec; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; @@ -849,9 +879,10 @@ err_out: static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + for (i = 0; i < nvec; i++) clear_comp_irq_affinity_hint(mdev, i); } @@ -863,9 +894,9 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { @@ -882,20 +913,20 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; #ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) + table->irq_table->rmap = alloc_irq_cpu_rmap(ncomp_eqs); + if (!table->irq_table->rmap) return -ENOMEM; #endif - for (i = 0; i < ncomp_vec; i++) { + for (i = 0; i < ncomp_eqs; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; @@ -912,7 +943,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) (unsigned long)&eq->tasklet_ctx); #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); + irq_cpu_rmap_add(table->irq_table->rmap, + pci_irq_vector(dev->pdev, vecidx)); #endif eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { @@ -967,22 +999,23 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + + return dev->priv.eq_table->irq_table->irq_info[vecidx].mask; } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return dev->priv.eq_table->irq_table->rmap; } #endif @@ -1008,16 +1041,17 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); @@ -1026,7 +1060,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; + struct mlx5_irq_table *table = priv->irq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); @@ -1050,7 +1084,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) goto err_free_irq_info; } - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + table->nvec = nvec; err = request_irqs(dev, nvec); if (err) @@ -1067,17 +1101,19 @@ err_free_irq_info: static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_table *table = dev->priv.irq_table; int i; - for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; err = alloc_irq_vectors(dev); @@ -1086,6 +1122,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) return err; } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); + err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 720f65bfe6a9..be79dceea3c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -804,10 +804,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -883,6 +889,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -905,6 +913,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22e69d4813e4..907515f3bfbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -153,6 +153,9 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 64155fe201ee..d8ab633406c2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -492,6 +492,7 @@ struct mlx5_eswitch; struct mlx5_lag; struct mlx5_devcom; struct mlx5_eq_table; +struct mlx5_irq_table; struct mlx5_rate_limit { u32 rate; @@ -521,6 +522,8 @@ struct mlx5_core_roce { }; struct mlx5_priv { + /* IRQ table valid only for real pci devices PF or VF */ + struct mlx5_irq_table *irq_table; struct mlx5_eq_table *eq_table; /* pages stuff */ -- cgit v1.2.3 From 81bfa206032a67f0700459a64a5493c246629604 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 10 Jun 2019 23:38:41 +0000 Subject: net/mlx5: Use a single IRQ for all async EQs The patch modifies the IRQ allocation so that all async EQs are assigned to the same IRQ resulting in more available IRQs for completion EQs. The changes are using the support for IRQ sharing and EQ polling budget that was introduced in previous patches so when the shared interrupt is triggered, the kernel will serially call the handler of each of the sharing EQs with a certain budget of EQEs to poll in order to prevent starvation. Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 19 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 38 +++++++++-------------- include/linux/mlx5/eq.h | 14 ++------- 4 files changed, 27 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7ce7c5bfe685..693a0e225093 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1557,7 +1557,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PFAULT_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0c72c122daef..0f5846a34928 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -250,7 +250,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; @@ -435,8 +435,9 @@ static int create_async_eq(struct mlx5_core_dev *dev, int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } @@ -540,7 +541,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, + .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, @@ -555,7 +556,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, + .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, @@ -568,7 +569,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, @@ -731,7 +732,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; for (i = 0; i < ncomp_eqs; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -748,7 +749,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, + .irq_index = vecidx, .mask = 0, .nent = nent, .nb = &eq->irq_nb, @@ -800,7 +801,7 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index fec861f4fefe..373981a659c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -45,7 +45,7 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) { - return table->nvec - MLX5_EQ_VEC_COMP_BASE; + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; } static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) @@ -81,24 +81,14 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) static void irq_set_name(char *name, int vecidx) { - switch (vecidx) { - case MLX5_EQ_CMD_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); - break; - case MLX5_EQ_ASYNC_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); - break; - case MLX5_EQ_PAGEREQ_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); - break; - case MLX5_EQ_PFAULT_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); - break; - default: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_EQ_VEC_COMP_BASE); - break; + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; } static int request_irqs(struct mlx5_core_dev *dev, int nvec) @@ -159,7 +149,7 @@ static int irq_set_rmap(struct mlx5_core_dev *mdev) goto err_out; } - vecidx = MLX5_EQ_VEC_COMP_BASE; + vecidx = MLX5_IRQ_VEC_COMP_BASE; for (; vecidx < irq_table->nvec; vecidx++) { err = irq_cpu_rmap_add(irq_table->rmap, pci_irq_vector(mdev->pdev, vecidx)); @@ -182,7 +172,7 @@ err_out: static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -205,7 +195,7 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -279,16 +269,16 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) int err; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) return -ENOMEM; table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); if (!table->irq) return -ENOMEM; - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 73ab658af764..4a94e04eff0a 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,17 +4,7 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -enum { - MLX5_EQ_PAGEREQ_IDX = 0, - MLX5_EQ_CMD_IDX = 1, - MLX5_EQ_ASYNC_IDX = 2, - /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */ - MLX5_EQ_PFAULT_IDX = 3, - MLX5_EQ_MAX_ASYNC_EQS, - /* completion eqs vector indices start here */ - MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, -}; - +#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_NUM_CMD_EQE (32) #define MLX5_NUM_ASYNC_EQE (0x1000) #define MLX5_NUM_SPARE_EQE (0x80) @@ -23,7 +13,7 @@ struct mlx5_eq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 index; + u8 irq_index; int nent; u64 mask; struct notifier_block *nb; -- cgit v1.2.3 From 1f8a7bee27e63d7c5287719049941e285e54d370 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:42 +0000 Subject: net/mlx5: Add EQ enable/disable API Previously, EQ joined the chain notifier on creation. This forced the caller to be ready to handle events before creating the EQ through eq_create_generic interface. To help the caller control when the created EQ will be attached to the IRQ, add enable/disable API. Signed-off-by: Yuval Avnery Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 105 ++++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - include/linux/mlx5/eq.h | 5 +- 4 files changed, 88 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 693a0e225093..12ccee1eb047 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1560,15 +1560,21 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; } + err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb); + if (err) { + mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err); + goto err_eq; + } return 0; +err_eq: + mlx5_eq_destroy_generic(dev->mdev, eq->core); err_wq: destroy_workqueue(eq->wq); err_mempool: @@ -1581,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) { int err; + mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb); err = mlx5_eq_destroy_generic(dev->mdev, eq->core); cancel_work_sync(&eq->work); destroy_workqueue(eq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0f5846a34928..58fff2f39b38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -304,27 +304,14 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - eq->irq_nb = param->nb; - - err = mlx5_irq_attach_nb(dev->priv.eq_table->irq_table, vecidx, - param->nb); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_detach; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_detach: - mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, vecidx, eq->irq_nb); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -336,17 +323,49 @@ err_buf: return err; } +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); + + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, - eq->vecidx, eq->irq_nb); - if (err) - mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", - err); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -544,14 +563,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .nb = &table->cmd_eq.irq_nb, }; err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -559,12 +581,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .nb = &table->async_eq.irq_nb, }; err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -572,21 +599,31 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .nb = &table->pages_eq.irq_nb, }; err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; -err2: +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: destroy_async_eq(dev, &table->async_eq.core); - -err1: +err2: mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); @@ -598,11 +635,13 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", @@ -610,6 +649,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", @@ -711,6 +751,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -752,13 +793,19 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .irq_index = vecidx, .mask = 0, .nent = nent, - .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 3836c39b2900..24bd991a727e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -33,7 +33,6 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; - struct notifier_block *irq_nb; /* For destroy only */ }; struct mlx5_eq_async { diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 4a94e04eff0a..70e16dcfb4c4 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -16,13 +16,16 @@ struct mlx5_eq_param { u8 irq_index; int nent; u64 mask; - struct notifier_block *nb; }; struct mlx5_eq * mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); -- cgit v1.2.3 From b25bbc2f24dcab9cd186ef4003c39bf51ad0454c Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 28 Jun 2018 15:05:58 +0300 Subject: net/mlx5: Add Vendor Specific Capability access gateway The Vendor Specific Capability (VSC) is used to activate a gateway interfacing with the device. The gateway is used to read or write device configurations, which are organized in different domains (spaces). A configuration access may result in multiple actions, reads, writes. Example usages are accessing the Crspace domain to read the crspace or locking a device semaphore using the Semaphore domain. The configuration access use pci_cfg_access to prevent parallel access to the VSC space by the driver and userspace calls. Signed-off-by: Alex Vesker Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 286 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h | 24 ++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 + include/linux/mlx5/driver.h | 1 + 5 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9006fda6bd11..8e07354faea1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o devlink.o + lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o devlink.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 000000000000..a27b0119b3d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 000000000000..28ea6bfa439f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5ea141893b99..3adc09a1a312 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -66,6 +66,7 @@ #include "lib/vxlan.h" #include "lib/geneve.h" #include "lib/devcom.h" +#include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -763,6 +764,8 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, goto err_clr_master; } + mlx5_pci_vsc_init(dev); + return 0; err_clr_master: diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3a810bf043fe..f732445bcbdb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -693,6 +693,7 @@ struct mlx5_core_dev { struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; + u32 vsc_addr; }; struct mlx5_db { -- cgit v1.2.3 From 8b9d8baae1de7400f19058020ee8f0f27d436687 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 17 Jul 2018 11:18:26 +0300 Subject: net/mlx5: Add Crdump support Crdump allows the driver to retrieve a dump of the FW PCI crspace. This is useful in case of catastrophic issues which may require FW reset. The crspace dump can be used for later debug. Signed-off-by: Alex Vesker Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/diag/crdump.c | 106 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 3 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 + include/linux/mlx5/driver.h | 1 + 5 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8e07354faea1..5fe2bf916c06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ - diag/fw_tracer.o devlink.o + diag/fw_tracer.o diag/crdump.o devlink.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 000000000000..dfb34172c69b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 397a2847867a..d918e44491f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); /* TODO move to lib/events.h */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3adc09a1a312..c70e97071b87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1313,6 +1313,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto clean_load; + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + pci_save_state(pdev); return 0; @@ -1334,6 +1338,7 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); mlx5_unregister_device(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f732445bcbdb..4ae533b3da07 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -435,6 +435,7 @@ struct mlx5_core_health { u32 prev; int miss_counter; bool sick; + u32 crdump_size; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; struct workqueue_struct *wq; -- cgit v1.2.3 From 63cbc552eebf08818af2025aef4589a48ef849c0 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 12 Nov 2018 15:23:02 +0200 Subject: net/mlx5: Handle SW reset of FW in error flow New mlx5 adapters allow the driver to reset the FW in the event of an error, this action called "SW Reset". When an SW reset is issued on any PF all PFs enter reset state which is a recoverable condition. The existing recovery flow was designed to allow the recovery of a VF after a PF driver reload. This patch adds the sw reset to the NIC states as a preparation for sw reset handling. When a software reset is issued the following occurs: 1. The NIC interface mode is set to 7 while the reset is in progress. 2. Once the reset completes the NIC interface mode is set to 1. Signed-off-by: Feras Daoud Signed-off-by: Moshe Shemesh Signed-off-by: Daniel Jurgens Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_selftest.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 105 +++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/driver.h | 2 +- 5 files changed, 48 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 4382ef85488c..840ec945ccba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv) { struct mlx5_core_health *health = &priv->mdev->priv.health; - return health->sick ? 1 : 0; + return health->fatal_error ? 1 : 0; } static int mlx5e_test_link_state(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a2656f4008d9..737e6d550775 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -62,12 +62,18 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, - MLX5_DROP_NEW_RECOVERY_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_NIC_DISABLED = 2, + MLX5_SENSOR_NIC_SW_RESET = 3, }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) { - return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; } void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) @@ -80,18 +86,25 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static int in_fatal(struct mlx5_core_dev *dev) +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; - if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) - return 1; + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} - if (ioread32be(&h->fw_ver) == 0xffffffff) - return 1; +static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; - return 0; + return MLX5_SENSOR_NO_ERR; } void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) @@ -101,7 +114,8 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + if (pci_channel_offline(dev->pdev) || + dev->priv.health.fatal_error != MLX5_SENSOR_NO_ERR || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } @@ -137,38 +151,14 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -static void health_recover(struct work_struct *work) -{ - struct mlx5_core_health *health; - struct delayed_work *dwork; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - u8 nic_state; - - dwork = container_of(work, struct delayed_work, work); - health = container_of(dwork, struct mlx5_core_health, recover_work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - - nic_state = mlx5_get_nic_state(dev); - if (nic_state == MLX5_NIC_IFC_INVALID) { - mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n"); - return; - } - - mlx5_core_err(dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); -} - /* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_DELAY_MSECS 60000 +#define MLX5_RECOVERY_WAIT_MSECS 60000 static void health_care(struct work_struct *work) { - unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); struct mlx5_core_health *health; struct mlx5_core_dev *dev; struct mlx5_priv *priv; - unsigned long flags; + unsigned long end; health = container_of(work, struct mlx5_core_health, work); priv = container_of(health, struct mlx5_priv, health); @@ -176,13 +166,18 @@ static void health_care(struct work_struct *work) mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); - spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) - schedule_delayed_work(&health->recover_work, recover_delay); - else - mlx5_core_err(dev, - "new health works are not permitted at this stage\n"); - spin_unlock_irqrestore(&health->wq_lock, flags); + end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) { + mlx5_core_err(dev, + "health recovery flow aborted, PCI reads still not working\n"); + return; + } + msleep(100); + } + + mlx5_core_err(dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); } static const char *hsynd_str(u8 synd) @@ -274,6 +269,7 @@ static void poll_health(struct timer_list *t) { struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; + u32 fatal_error; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -291,8 +287,11 @@ static void poll_health(struct timer_list *t) print_health_info(dev); } - if (in_fatal(dev) && !health->sick) { - health->sick = true; + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; print_health_info(dev); mlx5_trigger_health_work(dev); } @@ -306,9 +305,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); - health->sick = 0; + health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -324,7 +322,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) { spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -338,23 +335,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); } -void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) -{ - struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&dev->priv.health.recover_work); -} - void mlx5_health_flush(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -387,7 +371,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return -ENOMEM; spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); - INIT_DELAYED_WORK(&health->recover_work, health_recover); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c70e97071b87..fd0e2949c4f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1191,7 +1191,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) int err = 0; if (cleanup) - mlx5_drain_health_recovery(dev); + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index d4dd8c1ae55c..97f8cf67ced0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -214,7 +214,7 @@ enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3 + MLX5_NIC_IFC_SW_RESET = 7 }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4ae533b3da07..cc7fd8e62844 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -435,6 +435,7 @@ struct mlx5_core_health { u32 prev; int miss_counter; bool sick; + u32 fatal_error; u32 crdump_size; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; @@ -906,7 +907,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); -void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 3e5b72ac2f298423902169db7893fef43365e0a6 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 12 Nov 2018 16:40:17 +0200 Subject: net/mlx5: Issue SW reset on FW assert If a FW assert is considered fatal, indicated by a new bit in the health buffer, reset the FW. After the reset go through the normal recovery flow. Only one PF needs to issue the reset, so an attempt is made to prevent the 2nd function from also issuing the reset. It's not an error if that happens, it just slows recovery. Signed-off-by: Feras Daoud Signed-off-by: Alex Vesker Signed-off-by: Moshe Shemesh Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/diag/crdump.c | 13 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 157 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + include/linux/mlx5/device.h | 10 +- include/linux/mlx5/driver.h | 1 + 6 files changed, 176 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c index dfb34172c69b..28d02749d3c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -51,14 +51,23 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) ret); return ret; } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); if (ret) - goto unlock; + goto unlock_sem; ret = mlx5_crdump_fill(dev, cr_data); -unlock: +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: mlx5_vsc_gw_unlock(dev); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 737e6d550775..caf54bd7d538 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,6 +40,7 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/pci_vsc.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -67,8 +68,10 @@ enum { enum { MLX5_SENSOR_NO_ERR = 0, MLX5_SENSOR_PCI_COMM_ERR = 1, - MLX5_SENSOR_NIC_DISABLED = 2, - MLX5_SENSOR_NIC_SW_RESET = 3, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) @@ -95,32 +98,162 @@ static bool sensor_pci_not_working(struct mlx5_core_dev *dev) return (ioread32be(&h->fw_ver) == 0xffffffff); } +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; + u8 synd = ioread8(&h->synd); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + static u32 check_fatal_sensors(struct mlx5_core_dev *dev) { if (sensor_pci_not_working(dev)) return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) return MLX5_SENSOR_NIC_DISABLED; if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; return MLX5_SENSOR_NO_ERR; } +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal erros was + * PCI related a reset won't help. + */ + fatal_error = check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; +} + void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; + if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + goto unlock; + } - mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || - dev->priv.health.fatal_error != MLX5_SENSOR_NO_ERR || force) { + if (check_fatal_sensors(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +#define MLX5_CRDUMP_WAIT_MS 60000 +#define MLX5_FW_RESET_WAIT_MS 1000 +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = MLX5_CRDUMP_WAIT_MS; + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + mlx5_core_err(dev, "end\n"); unlock: @@ -143,6 +276,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) case MLX5_NIC_IFC_NO_DRAM_NIC: mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + default: mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", nic_interface); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fd0e2949c4f2..ec5287c51825 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1361,6 +1361,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_core_info(dev, "%s was called\n", __func__); mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); mlx5_unload_one(dev, false); /* In case of kernel call drain the health wq */ if (state) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 8593c8183d87..29bb61a10289 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -113,6 +113,7 @@ enum { enum mlx5_semaphore_space_address { MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, }; int mlx5_query_hca_caps(struct mlx5_core_dev *dev); @@ -122,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5e760067ac41..35ed38c2ae6c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -510,6 +510,10 @@ struct mlx5_cmd_layout { u8 status_own; }; +enum mlx5_fatal_assert_bit_offsets { + MLX5_RFR_OFFSET = 31, +}; + struct health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; @@ -518,12 +522,16 @@ struct health_buffer { __be32 rsvd1[2]; __be32 fw_ver; __be32 hw_id; - __be32 rsvd2; + __be32 rfr; u8 irisc_index; u8 synd; __be16 ext_synd; }; +enum mlx5_initializing_bit_offsets { + MLX5_FW_RESET_SUPPORTED_OFFSET = 30, +}; + enum mlx5_cmd_addr_l_sz_offset { MLX5_NIC_IFC_OFFSET = 8, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cc7fd8e62844..89205b6cc7ef 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -583,6 +583,7 @@ struct mlx5_priv { }; enum mlx5_device_state { + MLX5_DEVICE_STATE_UNINITIALIZED, MLX5_DEVICE_STATE_UP, MLX5_DEVICE_STATE_INTERNAL_ERROR, }; -- cgit v1.2.3 From 1e34f3efd413a6318c3edd6e8e7e091f1214b2e6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 11 Dec 2018 16:09:53 +0200 Subject: net/mlx5: Create FW devlink_health_reporter Create mlx5_devlink_health_reporter for FW reporter. The FW reporter implements devlink_health_reporter diagnose callback. The fw reporter diagnose command can be triggered any time by the user to check current fw status. In healthy status, it will return clear syndrome. Otherwise it will return the syndrome and description of the error type. Command example and output on healthy status: $ devlink health diagnose pci/0000:82:00.0 reporter fw Syndrome: 0 Command example and output on non healthy status: $ devlink health diagnose pci/0000:82:00.0 reporter fw Syndrome: 8 Description: unrecoverable hardware error Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 48 ++++++++++++++++++++++++ include/linux/mlx5/driver.h | 2 + 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index caf54bd7d538..973cc005ae60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -388,6 +388,51 @@ static void print_health_info(struct mlx5_core_dev *dev) mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw); } +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, +}; + +static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->fw_reporter = + devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, false, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); +} + +static void mlx5_fw_reporter_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + devlink_health_reporter_destroy(health->fw_reporter); +} + static unsigned long get_next_poll_jiffies(void) { unsigned long next; @@ -498,6 +543,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; destroy_workqueue(health->wq); + mlx5_fw_reporter_destroy(dev); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -519,5 +565,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); + mlx5_fw_reporter_create(dev); + return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 89205b6cc7ef..8d5d065d1aa6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -53,6 +53,7 @@ #include #include #include +#include enum { MLX5_BOARD_ID_LEN = 64, @@ -443,6 +444,7 @@ struct mlx5_core_health { unsigned long flags; struct work_struct work; struct delayed_work recover_work; + struct devlink_health_reporter *fw_reporter; }; struct mlx5_qp_table { -- cgit v1.2.3 From d1bf0e2cc4a6e66c2bff48176b8b2930098468ef Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 11 Dec 2018 16:09:56 +0200 Subject: net/mlx5: Report devlink health on FW issues Use devlink_health_report() to report any symptom of FW issue as FW counter miss or new health syndrome. The FW issues detected in mlx5 during poll_health which is called in timer atomic context and so health work queue is used to schedule the reports. Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 33 ++++++++++++++++++++++++ include/linux/mlx5/driver.h | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1c20d3f1d238..5e876f1de114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -515,6 +515,29 @@ mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); } +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrom reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { .name = "fw", .diagnose = mlx5_fw_reporter_diagnose, @@ -572,7 +595,9 @@ static void poll_health(struct timer_list *t) { struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; u32 fatal_error; + u8 prev_synd; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -588,8 +613,14 @@ static void poll_health(struct timer_list *t) if (health->miss_counter == MAX_MISSES) { mlx5_core_err(dev, "device's health compromised - reached miss count\n"); print_health_info(dev); + queue_work(health->wq, &health->report_work); } + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + fatal_error = check_fatal_sensors(dev); if (fatal_error && !health->fatal_error) { @@ -639,6 +670,7 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_work_sync(&health->report_work); cancel_work_sync(&health->work); } @@ -675,6 +707,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return -ENOMEM; spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); mlx5_fw_reporter_create(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8d5d065d1aa6..1931a4080d78 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -435,7 +435,7 @@ struct mlx5_core_health { struct timer_list timer; u32 prev; int miss_counter; - bool sick; + u8 synd; u32 fatal_error; u32 crdump_size; /* wq spinlock to synchronize draining */ @@ -443,6 +443,7 @@ struct mlx5_core_health { struct workqueue_struct *wq; unsigned long flags; struct work_struct work; + struct work_struct report_work; struct delayed_work recover_work; struct devlink_health_reporter *fw_reporter; }; -- cgit v1.2.3 From 96c82cdfe77b5e769624af71ec0554434037b82f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 11 Dec 2018 16:09:57 +0200 Subject: net/mlx5: Add fw fatal devlink_health_reporter Create mlx5_devlink_health_reporter for fw fatal reporter. The fw fatal reporter is added in addition to the fw reporter and implements the recover callback. The point of having two reporters for FW issues, is that we don't want to run FW recover on any issue, but only fatal ones. Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 81 ++++++++++++++++++------ include/linux/mlx5/driver.h | 1 + 2 files changed, 62 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 5e876f1de114..82a658834675 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -301,31 +301,43 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) /* How much time to wait until health resetting the driver (in msecs) */ #define MLX5_RECOVERY_WAIT_MSECS 60000 -static void health_care(struct work_struct *work) +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) { - struct mlx5_core_health *health; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; unsigned long end; - health = container_of(work, struct mlx5_core_health, work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); - end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); while (sensor_pci_not_working(dev)) { if (time_after(jiffies, end)) { mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n"); - return; + return -EIO; } msleep(100); } mlx5_core_err(dev, "starting health recovery flow\n"); mlx5_recover_device(dev); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || + check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + return 0; +} + +static void health_recover_work(struct work_struct *work) +{ + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_health_try_recover(dev); } static const char *hsynd_str(u8 synd) @@ -544,7 +556,22 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { .dump = mlx5_fw_reporter_dump, }; -static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev) +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, +}; + +#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); @@ -555,16 +582,26 @@ static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev) if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devlink_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + MLX5_REPORTER_FW_GRACEFUL_PERIOD, + true, dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); } -static void mlx5_fw_reporter_destroy(struct mlx5_core_dev *dev) +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - if (IS_ERR_OR_NULL(health->fw_reporter)) - return; + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); - devlink_health_reporter_destroy(health->fw_reporter); + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); } static unsigned long get_next_poll_jiffies(void) @@ -686,7 +723,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; destroy_workqueue(health->wq); - mlx5_fw_reporter_destroy(dev); + mlx5_fw_reporters_destroy(dev); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -694,22 +731,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev) struct mlx5_core_health *health; char *name; + mlx5_fw_reporters_create(dev); + health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); if (!name) - return -ENOMEM; + goto out_err; strcpy(name, "mlx5_health"); strcat(name, dev_name(dev->device)); health->wq = create_singlethread_workqueue(name); kfree(name); if (!health->wq) - return -ENOMEM; + goto out_err; spin_lock_init(&health->wq_lock); - INIT_WORK(&health->work, health_care); + INIT_WORK(&health->work, health_recover_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); - mlx5_fw_reporter_create(dev); - return 0; + +out_err: + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1931a4080d78..caac96bf9c0d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -446,6 +446,7 @@ struct mlx5_core_health { struct work_struct report_work; struct delayed_work recover_work; struct devlink_health_reporter *fw_reporter; + struct devlink_health_reporter *fw_fatal_reporter; }; struct mlx5_qp_table { -- cgit v1.2.3 From b3bd076f7501afea2871bb4738ab53498fd32cd5 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 27 Jan 2019 18:38:39 +0200 Subject: net/mlx5: Report devlink health on FW fatal issues Report devlink health on FW fatal issues via fw_fatal_reporter. The driver recover flow for FW fatal error is now being handled by the devlink health. Having the recovery controlled by devlink health, the user has the ability to cancel the auto-recovery for debug session and run it manually. Call mlx5_enter_error_state() before calling devlink_health_report() to ensure entering device error state even if auto-recovery is off. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 42 +++++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 +++--- include/linux/mlx5/driver.h | 2 +- 3 files changed, 31 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 4ef62c6c6424..2fe6923f7ce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -327,19 +327,6 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev) return 0; } -static void health_recover_work(struct work_struct *work) -{ - struct mlx5_core_health *health; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - - health = container_of(work, struct mlx5_core_health, work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - - mlx5_health_try_recover(dev); -} - static const char *hsynd_str(u8 synd) { switch (synd) { @@ -614,6 +601,29 @@ free_data: return err; } +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx); +} + static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .name = "fw_fatal", .recover = mlx5_fw_fatal_reporter_recover, @@ -672,7 +682,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->work); + queue_work(health->wq, &health->fatal_report_work); else mlx5_core_err(dev, "new health works are not permitted at this stage\n"); spin_unlock_irqrestore(&health->wq_lock, flags); @@ -758,7 +768,7 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); cancel_work_sync(&health->report_work); - cancel_work_sync(&health->work); + cancel_work_sync(&health->fatal_report_work); } void mlx5_health_flush(struct mlx5_core_dev *dev) @@ -795,7 +805,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) if (!health->wq) goto out_err; spin_lock_init(&health->wq_lock); - INIT_WORK(&health->work, health_recover_work); + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ec5287c51825..998eec938d3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1363,11 +1363,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); mlx5_unload_one(dev, false); - /* In case of kernel call drain the health wq */ - if (state) { - mlx5_drain_health_wq(dev); - mlx5_pci_disable_device(dev); - } + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; @@ -1535,7 +1532,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { - mlx5_pci_err_detected(dev->pdev, 0); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); } void mlx5_recover_device(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index caac96bf9c0d..25847beabd3f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -442,7 +442,7 @@ struct mlx5_core_health { spinlock_t wq_lock; struct workqueue_struct *wq; unsigned long flags; - struct work_struct work; + struct work_struct fatal_report_work; struct work_struct report_work; struct delayed_work recover_work; struct devlink_health_reporter *fw_reporter; -- cgit v1.2.3 From 33ee09cd59ce154b64f9df942dfa5456db90d5f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Jun 2019 19:59:51 +0300 Subject: device property: Add helpers to count items in an array The usual pattern to allocate the necessary space for an array of properties is to count them first by calling: count = device_property_read_uXX_array(dev, propname, NULL, 0); if (count < 0) return count; Introduce helpers device_property_count_uXX() to count items by supplying hard coded last two parameters to device_property_readXX_array(). Signed-off-by: Andy Shevchenko Acked-by: Sakari Ailus Reviewed-by: Heikki Krogerus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 088d4db7e949..dbacf17fff2e 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -148,6 +148,26 @@ static inline int device_property_read_u64(struct device *dev, return device_property_read_u64_array(dev, propname, val, 1); } +static inline int device_property_count_u8(struct device *dev, const char *propname) +{ + return device_property_read_u8_array(dev, propname, NULL, 0); +} + +static inline int device_property_count_u16(struct device *dev, const char *propname) +{ + return device_property_read_u16_array(dev, propname, NULL, 0); +} + +static inline int device_property_count_u32(struct device *dev, const char *propname) +{ + return device_property_read_u32_array(dev, propname, NULL, 0); +} + +static inline int device_property_count_u64(struct device *dev, const char *propname) +{ + return device_property_read_u64_array(dev, propname, NULL, 0); +} + static inline bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { @@ -178,6 +198,30 @@ static inline int fwnode_property_read_u64(const struct fwnode_handle *fwnode, return fwnode_property_read_u64_array(fwnode, propname, val, 1); } +static inline int fwnode_property_count_u8(const struct fwnode_handle *fwnode, + const char *propname) +{ + return fwnode_property_read_u8_array(fwnode, propname, NULL, 0); +} + +static inline int fwnode_property_count_u16(const struct fwnode_handle *fwnode, + const char *propname) +{ + return fwnode_property_read_u16_array(fwnode, propname, NULL, 0); +} + +static inline int fwnode_property_count_u32(const struct fwnode_handle *fwnode, + const char *propname) +{ + return fwnode_property_read_u32_array(fwnode, propname, NULL, 0); +} + +static inline int fwnode_property_count_u64(const struct fwnode_handle *fwnode, + const char *propname) +{ + return fwnode_property_read_u64_array(fwnode, propname, NULL, 0); +} + /** * struct property_entry - "Built-in" device property representation. * @name: Name of the property. -- cgit v1.2.3 From de76cda215d56256ffcda7ffa538b70f9fb301a7 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Tue, 4 Jun 2019 18:24:43 +0200 Subject: PCI: Decode PCIe 32 GT/s link speed PCIe r5.0, sec 7.5.3.18, defines a new 32.0 GT/s bit in the Supported Link Speeds Vector of Link Capabilities 2. Decode this new speed. This does not affect the speed of the link, which should be negotiated automatically by the hardware; it only adds decoding when showing the speed to the user. Previously, reading the speed of a link operating at this speed showed "Unknown speed" instead of "32.0 GT/s". Link: https://lore.kernel.org/lkml/92365e3caf0fc559f9ab14bcd053bfc92d4f661c.1559664969.git.gustavo.pimentel@synopsys.com Signed-off-by: Gustavo Pimentel [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-sysfs.c | 3 +++ drivers/pci/pci.c | 4 +++- drivers/pci/probe.c | 2 +- drivers/pci/slot.c | 1 + include/linux/pci.h | 1 + include/uapi/linux/pci_regs.h | 4 ++++ 6 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 6d27475e39b2..d52d30448e41 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -182,6 +182,9 @@ static ssize_t current_link_speed_show(struct device *dev, return -EINVAL; switch (linkstat & PCI_EXP_LNKSTA_CLS) { + case PCI_EXP_LNKSTA_CLS_32_0GB: + speed = "32 GT/s"; + break; case PCI_EXP_LNKSTA_CLS_16_0GB: speed = "16 GT/s"; break; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8abc843b1615..4729a7c7a9d9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5621,7 +5621,9 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) */ pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); if (lnkcap2) { /* PCIe r3.0-compliant */ - if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_16_0GB) + if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_32_0GB) + return PCIE_SPEED_32_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_16_0GB) return PCIE_SPEED_16_0GT; else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) return PCIE_SPEED_8_0GT; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0e8e2c186f50..c5f27c8cd140 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -668,7 +668,7 @@ const unsigned char pcie_link_speed[] = { PCIE_SPEED_5_0GT, /* 2 */ PCIE_SPEED_8_0GT, /* 3 */ PCIE_SPEED_16_0GT, /* 4 */ - PCI_SPEED_UNKNOWN, /* 5 */ + PCIE_SPEED_32_0GT, /* 5 */ PCI_SPEED_UNKNOWN, /* 6 */ PCI_SPEED_UNKNOWN, /* 7 */ PCI_SPEED_UNKNOWN, /* 8 */ diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index f4d92b1afe7b..ae4aa0e1f2f4 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -75,6 +75,7 @@ static const char *pci_bus_speed_strings[] = { "5.0 GT/s PCIe", /* 0x15 */ "8.0 GT/s PCIe", /* 0x16 */ "16.0 GT/s PCIe", /* 0x17 */ + "32.0 GT/s PCIe", /* 0x18 */ }; static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf) diff --git a/include/linux/pci.h b/include/linux/pci.h index 4a5a84d7bdd4..2173e6b75579 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -258,6 +258,7 @@ enum pci_bus_speed { PCIE_SPEED_5_0GT = 0x15, PCIE_SPEED_8_0GT = 0x16, PCIE_SPEED_16_0GT = 0x17, + PCIE_SPEED_32_0GT = 0x18, PCI_SPEED_UNKNOWN = 0xff, }; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 27164769d184..f28e562d7ca8 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -528,6 +528,7 @@ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ #define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ #define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */ +#define PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ @@ -556,6 +557,7 @@ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ #define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ #define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_32_0GB 0x0005 /* Current Link Speed 32.0GT/s */ #define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ #define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ #define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ @@ -661,6 +663,7 @@ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ #define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ +#define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKCTL2_TLS 0x000f @@ -668,6 +671,7 @@ #define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ #define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ +#define PCI_EXP_LNKCTL2_TLS_32_0GT 0x0005 /* Supported Speed 32GT/s */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ -- cgit v1.2.3 From b3119cde1d70d6df1574b9f26d8e087e8e5116b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 May 2019 10:07:45 -0700 Subject: rcu: Fix irritating whitespace error in rcu_assign_pointer() Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 915460ec0872..534c05d529b5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -369,7 +369,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_assign_pointer(p, v) \ ({ \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ - rcu_check_sparse(p, __rcu); \ + rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ -- cgit v1.2.3 From 6da9f775175e516fc7229ceaa9b54f8f56aa7924 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 21 May 2019 16:48:43 -0400 Subject: rcu: Force inlining of rcu_read_lock() When debugging options are turned on, the rcu_read_lock() function might not be inlined. This results in lockdep's print_lock() function printing "rcu_read_lock+0x0/0x70" instead of rcu_read_lock()'s caller. For example: [ 10.579995] ============================= [ 10.584033] WARNING: suspicious RCU usage [ 10.588074] 4.18.0.memcg_v2+ #1 Not tainted [ 10.593162] ----------------------------- [ 10.597203] include/linux/rcupdate.h:281 Illegal context switch in RCU read-side critical section! [ 10.606220] [ 10.606220] other info that might help us debug this: [ 10.606220] [ 10.614280] [ 10.614280] rcu_scheduler_active = 2, debug_locks = 1 [ 10.620853] 3 locks held by systemd/1: [ 10.624632] #0: (____ptrval____) (&type->i_mutex_dir_key#5){.+.+}, at: lookup_slow+0x42/0x70 [ 10.633232] #1: (____ptrval____) (rcu_read_lock){....}, at: rcu_read_lock+0x0/0x70 [ 10.640954] #2: (____ptrval____) (rcu_read_lock){....}, at: rcu_read_lock+0x0/0x70 These "rcu_read_lock+0x0/0x70" strings are not providing any useful information. This commit therefore forces inlining of the rcu_read_lock() function so that rcu_read_lock()'s caller is instead shown. Signed-off-by: Waiman Long Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 534c05d529b5..a8ed624da555 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -588,7 +588,7 @@ static inline void rcu_preempt_sleep_check(void) { } * read-side critical sections may be preempted and they may also block, but * only when acquiring spinlocks that are subject to priority inheritance. */ -static inline void rcu_read_lock(void) +static __always_inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); -- cgit v1.2.3 From 9129b017b54dab09eb69b7269026243156e5188e Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 27 May 2019 10:49:57 +0200 Subject: rcu: Don't return a value from rcu_assign_pointer() Quoting Paul [1]: "Given that a quick (and perhaps error-prone) search of the uses of rcu_assign_pointer() in v5.1 didn't find a single use of the return value, let's please instead change the documentation and implementation to eliminate the return value." [1] https://lkml.kernel.org/r/20190523135013.GL28207@linux.ibm.com Signed-off-by: Andrea Parri Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Cc: Joel Fernandes Cc: rcu@vger.kernel.org Cc: Peter Zijlstra Cc: Will Deacon Cc: Mark Rutland Cc: Matthew Wilcox Cc: Sasha Levin Signed-off-by: Paul E. McKenney --- Documentation/RCU/whatisRCU.txt | 8 ++++---- include/linux/rcupdate.h | 5 ++--- tools/include/linux/rcu.h | 4 ++-- tools/testing/radix-tree/linux/rcupdate.h | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 981651a8b65d..7e1a8721637a 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -212,7 +212,7 @@ synchronize_rcu() rcu_assign_pointer() - typeof(p) rcu_assign_pointer(p, typeof(p) v); + void rcu_assign_pointer(p, typeof(p) v); Yes, rcu_assign_pointer() -is- implemented as a macro, though it would be cool to be able to declare a function in this manner. @@ -220,9 +220,9 @@ rcu_assign_pointer() The updater uses this function to assign a new value to an RCU-protected pointer, in order to safely communicate the change - in value from the updater to the reader. This function returns - the new value, and also executes any memory-barrier instructions - required for a given CPU architecture. + in value from the updater to the reader. This macro does not + evaluate to an rvalue, but it does execute any memory-barrier + instructions required for a given CPU architecture. Perhaps just as important, it serves to document (1) which pointers are protected by RCU and (2) the point at which a diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a8ed624da555..0c9b92799abc 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -367,7 +367,7 @@ static inline void rcu_preempt_sleep_check(void) { } * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ -({ \ +do { \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ rcu_check_sparse(p, __rcu); \ \ @@ -375,8 +375,7 @@ static inline void rcu_preempt_sleep_check(void) { } WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ - _r_a_p__v; \ -}) +} while (0) /** * rcu_swap_protected() - swap an RCU and a regular pointer diff --git a/tools/include/linux/rcu.h b/tools/include/linux/rcu.h index 7d02527e5bce..9554d3fa54f3 100644 --- a/tools/include/linux/rcu.h +++ b/tools/include/linux/rcu.h @@ -19,7 +19,7 @@ static inline bool rcu_is_watching(void) return false; } -#define rcu_assign_pointer(p, v) ((p) = (v)) -#define RCU_INIT_POINTER(p, v) p=(v) +#define rcu_assign_pointer(p, v) do { (p) = (v); } while (0) +#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0) #endif diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h index fd280b070fdb..fed468fb0c78 100644 --- a/tools/testing/radix-tree/linux/rcupdate.h +++ b/tools/testing/radix-tree/linux/rcupdate.h @@ -7,6 +7,6 @@ #define rcu_dereference_raw(p) rcu_dereference(p) #define rcu_dereference_protected(p, cond) rcu_dereference(p) #define rcu_dereference_check(p, cond) rcu_dereference(p) -#define RCU_INIT_POINTER(p, v) (p) = (v) +#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0) #endif -- cgit v1.2.3 From 4368dada5b37e74a13b892ca5cef8a7d558e9a5f Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Tue, 11 Jun 2019 18:45:09 +0300 Subject: ptp: ptp_clock: Publish scaled_ppm_to_ppb Publish scaled_ppm_to_ppb to allow drivers to use it. Signed-off-by: Shalom Toledo Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 3 ++- include/linux/ptp_clock_kernel.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index e189fa1be21e..e60eab7f8a61 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue, spin_unlock_irqrestore(&queue->lock, flags); } -static s32 scaled_ppm_to_ppb(long ppm) +s32 scaled_ppm_to_ppb(long ppm) { /* * The 'freq' field in the 'struct timex' is in parts per @@ -82,6 +82,7 @@ static s32 scaled_ppm_to_ppb(long ppm) ppb >>= 13; return (s32) ppb; } +EXPORT_SYMBOL(scaled_ppm_to_ppb); /* posix clock implementation */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 28eb9c792522..93cc4f1d444a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -212,6 +212,14 @@ extern void ptp_clock_event(struct ptp_clock *ptp, extern int ptp_clock_index(struct ptp_clock *ptp); +/** + * scaled_ppm_to_ppb() - convert scaled ppm to ppb + * + * @ppm: Parts per million, but with a 16 bit binary fractional field + */ + +extern s32 scaled_ppm_to_ppb(long ppm); + /** * ptp_find_pin() - obtain the pin index of a given auxiliary function * -- cgit v1.2.3 From 4892d3a6a009f7eba2e806b9183e5d8790769f41 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 14 Jun 2019 10:12:26 +0200 Subject: gpio: Drop the parent_irq from gpio_irq_chip We already have an array named "parents" so instead of letting one point to the other, simply allocate a dynamic array to hold the parents, just one if desired and drop the number of members in gpio_irq_chip by 1. Rename gpiochip to gc in the process. Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 30 +++++++++++++++++++----------- include/linux/gpio/driver.h | 7 ------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4561cb39bdb4..71cd685ed6c4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1644,39 +1644,47 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_irq_valid); /** * gpiochip_set_cascaded_irqchip() - connects a cascaded irqchip to a gpiochip - * @gpiochip: the gpiochip to set the irqchip chain to + * @gc: the gpiochip to set the irqchip chain to * @parent_irq: the irq number corresponding to the parent IRQ for this * chained irqchip * @parent_handler: the parent interrupt handler for the accumulated IRQ * coming out of the gpiochip. If the interrupt is nested rather than * cascaded, pass NULL in this handler argument */ -static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip, +static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gc, unsigned int parent_irq, irq_flow_handler_t parent_handler) { - if (!gpiochip->irq.domain) { - chip_err(gpiochip, "called %s before setting up irqchip\n", + struct gpio_irq_chip *girq = &gc->irq; + struct device *dev = &gc->gpiodev->dev; + + if (!girq->domain) { + chip_err(gc, "called %s before setting up irqchip\n", __func__); return; } if (parent_handler) { - if (gpiochip->can_sleep) { - chip_err(gpiochip, + if (gc->can_sleep) { + chip_err(gc, "you cannot have chained interrupts on a chip that may sleep\n"); return; } + girq->parents = devm_kcalloc(dev, 1, + sizeof(*girq->parents), + GFP_KERNEL); + if (!girq->parents) { + chip_err(gc, "out of memory allocating parent IRQ\n"); + return; + } + girq->parents[0] = parent_irq; + girq->num_parents = 1; /* * The parent irqchip is already using the chip_data for this * irqchip, so our callbacks simply use the handler_data. */ irq_set_chained_handler_and_data(parent_irq, parent_handler, - gpiochip); - - gpiochip->irq.parent_irq = parent_irq; - gpiochip->irq.parents = &gpiochip->irq.parent_irq; - gpiochip->irq.num_parents = 1; + gc); } } diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 937c40fb61f7..02698c0f34ea 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -102,13 +102,6 @@ struct gpio_irq_chip { */ unsigned int num_parents; - /** - * @parent_irq: - * - * For use by gpiochip_set_cascaded_irqchip() - */ - unsigned int parent_irq; - /** * @parents: * -- cgit v1.2.3 From 1ec0cd8286f35988134e05367ab5e66213b84e7c Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 24 May 2019 12:44:18 +0200 Subject: PM: hibernate: powerpc: Expose pfn_is_nosave() prototype The declaration for pfn_is_nosave is only available in kernel/power/power.h. Since this function can be override in arch, expose it globally. Having a prototype will make sure to avoid warning (sometime treated as error with W=1) such as: arch/powerpc/kernel/suspend.c:18:5: error: no previous prototype for 'pfn_is_nosave' [-Werror=missing-prototypes] This moves the declaration into a globally visible header file and add missing include to avoid a warning on powerpc. Also remove the duplicated prototypes since not required anymore. Signed-off-by: Mathieu Malaterre Acked-by: Michael Ellerman (powerpc) Signed-off-by: Rafael J. Wysocki --- arch/powerpc/kernel/suspend.c | 1 + arch/s390/kernel/entry.h | 1 - include/linux/suspend.h | 1 + kernel/power/power.h | 2 -- 4 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index c612d50c9d18..b84992c10854 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 20420c2b8a14..b2956d49b6ad 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -63,7 +63,6 @@ void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); -int pfn_is_nosave(unsigned long); void s390_early_resume(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8594001e8be8..05645f726815 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,6 +426,7 @@ extern bool system_entering_hibernation(void); extern bool hibernation_available(void); asmlinkage int swsusp_save(void); extern struct pbe *restore_pblist; +int pfn_is_nosave(unsigned long pfn); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} diff --git a/kernel/power/power.h b/kernel/power/power.h index 9e58bdc8a562..44bee462ff57 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -75,8 +75,6 @@ static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} #endif /* !CONFIG_HIBERNATION */ -extern int pfn_is_nosave(unsigned long); - #define power_attr(_name) \ static struct kobj_attribute _name##_attr = { \ .attr = { \ -- cgit v1.2.3 From bb2bb903042517b8fb17b2bc21e00512f2dcac01 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 13 Jun 2019 15:34:07 -0700 Subject: dma-buf: add DMA_BUF_SET_NAME ioctls This patch adds complimentary DMA_BUF_SET_NAME ioctls, which lets userspace processes attach a free-form name to each buffer. This information can be extremely helpful for tracking and accounting shared buffers. For example, on Android, we know what each buffer will be used for at allocation time: GL, multimedia, camera, etc. The userspace allocator can use DMA_BUF_SET_NAME to associate that information with the buffer, so we can later give developers a breakdown of how much memory they're allocating for graphics, camera, etc. Signed-off-by: Greg Hackmann Signed-off-by: Chenbo Feng Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20190613223408.139221-3-fengc@google.com --- drivers/dma-buf/dma-buf.c | 65 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/dma-buf.h | 5 +++- include/uapi/linux/dma-buf.h | 3 ++ 3 files changed, 69 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 3612ccededd6..ab96410d1dcd 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -48,8 +48,24 @@ struct dma_buf_list { static struct dma_buf_list db_list; +static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct dma_buf *dmabuf; + char name[DMA_BUF_NAME_LEN]; + size_t ret = 0; + + dmabuf = dentry->d_fsdata; + mutex_lock(&dmabuf->lock); + if (dmabuf->name) + ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN); + mutex_unlock(&dmabuf->lock); + + return dynamic_dname(dentry, buffer, buflen, "/%s:%s", + dentry->d_name.name, ret > 0 ? name : ""); +} + static const struct dentry_operations dma_buf_dentry_ops = { - .d_dname = simple_dname, + .d_dname = dmabuffs_dname, }; static struct vfsmount *dma_buf_mnt; @@ -301,6 +317,43 @@ out: return events; } +/** + * dma_buf_set_name - Set a name to a specific dma_buf to track the usage. + * The name of the dma-buf buffer can only be set when the dma-buf is not + * attached to any devices. It could theoritically support changing the + * name of the dma-buf if the same piece of memory is used for multiple + * purpose between different devices. + * + * @dmabuf [in] dmabuf buffer that will be renamed. + * @buf: [in] A piece of userspace memory that contains the name of + * the dma-buf. + * + * Returns 0 on success. If the dma-buf buffer is already attached to + * devices, return -EBUSY. + * + */ +static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) +{ + char *name = strndup_user(buf, DMA_BUF_NAME_LEN); + long ret = 0; + + if (IS_ERR(name)) + return PTR_ERR(name); + + mutex_lock(&dmabuf->lock); + if (!list_empty(&dmabuf->attachments)) { + ret = -EBUSY; + kfree(name); + goto out_unlock; + } + kfree(dmabuf->name); + dmabuf->name = name; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ret; +} + static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -339,6 +392,10 @@ static long dma_buf_ioctl(struct file *file, ret = dma_buf_begin_cpu_access(dmabuf, direction); return ret; + + case DMA_BUF_SET_NAME: + return dma_buf_set_name(dmabuf, (const char __user *)arg); + default: return -ENOTTY; } @@ -380,6 +437,7 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) goto err_alloc_file; file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = dmabuf; + file->f_path.dentry->d_fsdata = dmabuf; return file; @@ -1112,12 +1170,13 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) continue; } - seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\n", + seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, file_count(buf_obj->file), buf_obj->exp_name, - file_inode(buf_obj->file)->i_ino); + file_inode(buf_obj->file)->i_ino, + buf_obj->name ?: ""); robj = buf_obj->resv; while (true) { diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8a327566d7f4..01ad5b942a6f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -280,10 +280,12 @@ struct dma_buf_ops { * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. - * @lock: used internally to serialize list manipulation, attach/detach and vmap/unmap + * @lock: used internally to serialize list manipulation, attach/detach and + * vmap/unmap, and accesses to name * @vmapping_counter: used internally to refcnt the vmaps * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 * @exp_name: name of the exporter; useful for debugging. + * @name: userspace-provided name; useful for accounting and debugging. * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. @@ -311,6 +313,7 @@ struct dma_buf { unsigned vmapping_counter; void *vmap_ptr; const char *exp_name; + const char *name; struct module *owner; struct list_head list_node; void *priv; diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index d75df5210a4a..dbc7092e04b5 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -35,7 +35,10 @@ struct dma_buf_sync { #define DMA_BUF_SYNC_VALID_FLAGS_MASK \ (DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END) +#define DMA_BUF_NAME_LEN 32 + #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) +#define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) #endif -- cgit v1.2.3 From d1609c312d42f3bdfe7df9d4dd9d5b2c7ace90f4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Sun, 28 Apr 2019 10:35:31 +0800 Subject: usb: chipidea: imx: add imx7ulp support In this commit, we add CI_HDRC_PMQOS to avoid system entering idle, at imx7ulp, if the system enters idle, the DMA will stop, so the USB transfer can't work at this case. Signed-off-by: Peter Chen --- drivers/usb/chipidea/ci_hdrc_imx.c | 28 +++++++++++++++++++++++++++- drivers/usb/chipidea/usbmisc_imx.c | 4 ++++ include/linux/usb/chipidea.h | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index ceec8d5985d4..a76708501236 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "ci.h" #include "ci_hdrc_imx.h" @@ -63,6 +64,11 @@ static const struct ci_hdrc_imx_platform_flag imx7d_usb_data = { .flags = CI_HDRC_SUPPORTS_RUNTIME_PM, }; +static const struct ci_hdrc_imx_platform_flag imx7ulp_usb_data = { + .flags = CI_HDRC_SUPPORTS_RUNTIME_PM | + CI_HDRC_PMQOS, +}; + static const struct of_device_id ci_hdrc_imx_dt_ids[] = { { .compatible = "fsl,imx23-usb", .data = &imx23_usb_data}, { .compatible = "fsl,imx28-usb", .data = &imx28_usb_data}, @@ -72,6 +78,7 @@ static const struct of_device_id ci_hdrc_imx_dt_ids[] = { { .compatible = "fsl,imx6sx-usb", .data = &imx6sx_usb_data}, { .compatible = "fsl,imx6ul-usb", .data = &imx6ul_usb_data}, { .compatible = "fsl,imx7d-usb", .data = &imx7d_usb_data}, + { .compatible = "fsl,imx7ulp-usb", .data = &imx7ulp_usb_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, ci_hdrc_imx_dt_ids); @@ -93,6 +100,8 @@ struct ci_hdrc_imx_data { struct clk *clk_ahb; struct clk *clk_per; /* --------------------------------- */ + struct pm_qos_request pm_qos_req; + const struct ci_hdrc_imx_platform_flag *plat_data; }; /* Common functions shared by usbmisc drivers */ @@ -309,6 +318,8 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + data->plat_data = imx_platform_flag; + pdata.flags |= imx_platform_flag->flags; platform_set_drvdata(pdev, data); data->usbmisc_data = usbmisc_get_init_data(dev); if (IS_ERR(data->usbmisc_data)) @@ -369,6 +380,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) } } } + + if (pdata.flags & CI_HDRC_PMQOS) + pm_qos_add_request(&data->pm_qos_req, + PM_QOS_CPU_DMA_LATENCY, 0); + ret = imx_get_clks(dev); if (ret) goto disable_hsic_regulator; @@ -396,7 +412,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) usb_phy_init(pdata.usb_phy); } - pdata.flags |= imx_platform_flag->flags; if (pdata.flags & CI_HDRC_SUPPORTS_RUNTIME_PM) data->supports_runtime_pm = true; @@ -439,6 +454,8 @@ err_clk: disable_hsic_regulator: if (data->hsic_pad_regulator) ret = regulator_disable(data->hsic_pad_regulator); + if (pdata.flags & CI_HDRC_PMQOS) + pm_qos_remove_request(&data->pm_qos_req); return ret; } @@ -455,6 +472,8 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) if (data->override_phy_control) usb_phy_shutdown(data->phy); imx_disable_unprepare_clks(&pdev->dev); + if (data->plat_data->flags & CI_HDRC_PMQOS) + pm_qos_remove_request(&data->pm_qos_req); if (data->hsic_pad_regulator) regulator_disable(data->hsic_pad_regulator); @@ -480,6 +499,9 @@ static int __maybe_unused imx_controller_suspend(struct device *dev) } imx_disable_unprepare_clks(dev); + if (data->plat_data->flags & CI_HDRC_PMQOS) + pm_qos_remove_request(&data->pm_qos_req); + data->in_lpm = true; return 0; @@ -497,6 +519,10 @@ static int __maybe_unused imx_controller_resume(struct device *dev) return 0; } + if (data->plat_data->flags & CI_HDRC_PMQOS) + pm_qos_add_request(&data->pm_qos_req, + PM_QOS_CPU_DMA_LATENCY, 0); + ret = imx_prepare_enable_clks(dev); if (ret) return ret; diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index d8b67e150b12..b7a5727d0c8a 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -763,6 +763,10 @@ static const struct of_device_id usbmisc_imx_dt_ids[] = { .compatible = "fsl,imx7d-usbmisc", .data = &imx7d_usbmisc_ops, }, + { + .compatible = "fsl,imx7ulp-usbmisc", + .data = &imx7d_usbmisc_ops, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, usbmisc_imx_dt_ids); diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 911e05af671e..edd89b7c8f18 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -61,6 +61,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_OVERRIDE_PHY_CONTROL BIT(12) /* Glue layer manages phy */ #define CI_HDRC_REQUIRES_ALIGNED_DMA BIT(13) #define CI_HDRC_IMX_IS_HSIC BIT(14) +#define CI_HDRC_PMQOS BIT(15) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit v1.2.3 From 26f7044e95042daabcf1c71796a0e804a83c979f Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Thu, 9 May 2019 09:49:06 +0000 Subject: nl80211: add support for SAE authentication offload Let drivers advertise support for station-mode SAE authentication offload with a new NL80211_EXT_FEATURE_SAE_OFFLOAD flag. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/cfg80211.h | 5 +++++ include/uapi/linux/nl80211.h | 19 +++++++++++++++++++ net/wireless/nl80211.c | 14 ++++++++++++++ 4 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 61f0a316c6ac..5dfd949ade25 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2612,6 +2612,7 @@ enum ieee80211_key_len { #define FILS_ERP_MAX_RRK_LEN 64 #define PMK_MAX_LEN 64 +#define SAE_PASSWORD_MAX_LEN 128 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c19687833493..4b45056dbb25 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -742,6 +742,9 @@ struct survey_info { * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key * @psk: PSK (for devices supporting 4-way-handshake offload) + * @sae_pwd: password for SAE authentication (for devices supporting SAE + * offload) + * @sae_pwd_len: length of SAE password (for devices supporting SAE offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -757,6 +760,8 @@ struct cfg80211_crypto_settings { struct key_params *wep_keys; int wep_tx_key; const u8 *psk; + const u8 *sae_pwd; + u8 sae_pwd_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e9bf3d69d847..8b1e43fecd25 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -234,6 +234,15 @@ * use in a FILS shared key connection with PMKSA caching. */ +/** + * DOC: SAE authentication offload + * + * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they + * support offloading SAE authentication for WPA3-Personal networks. In + * %NL80211_CMD_CONNECT the password for SAE should be specified using + * %NL80211_ATTR_SAE_PASSWORD. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -2341,6 +2350,10 @@ enum nl80211_commands { * should be picking up the lowest tx power, either tx power per-interface * or per-station. * + * @NL80211_ATTR_SAE_PASSWORD: attribute for passing SAE password material. It + * is used with %NL80211_CMD_CONNECT to provide password for offloading + * SAE authentication for WPA3-Personal networks. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2794,6 +2807,8 @@ enum nl80211_attrs { NL80211_ATTR_STA_TX_POWER_SETTING, NL80211_ATTR_STA_TX_POWER, + NL80211_ATTR_SAE_PASSWORD, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5423,6 +5438,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power * to a station. * + * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in + * station mode (SAE password is passed as part of the connect command). + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5467,6 +5485,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, NL80211_EXT_FEATURE_EXT_KEY_ID, NL80211_EXT_FEATURE_STA_TX_PWR, + NL80211_EXT_FEATURE_SAE_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8332a5731c57..80e514872719 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -571,6 +571,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_PEER_MEASUREMENTS] = NLA_POLICY_NESTED(nl80211_pmsr_attr_policy), [NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1), + [NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY, + .len = SAE_PASSWORD_MAX_LEN }, }; /* policy for the key attributes */ @@ -4434,6 +4436,8 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, return true; case NL80211_CMD_CONNECT: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SAE_OFFLOAD) && auth_type == NL80211_AUTHTYPE_SAE) return false; @@ -8973,6 +8977,16 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); } + if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SAE_OFFLOAD)) + return -EINVAL; + settings->sae_pwd = + nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]); + settings->sae_pwd_len = + nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]); + } + return 0; } -- cgit v1.2.3 From cd6f34110285742ec5570f07aa2229e29f4d2092 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 29 May 2019 15:25:33 +0300 Subject: ieee80211: Add a missing extended capability flag definition Add the "OBSS Narrow Bandwidth RU In OFDMA Tolerance Support" flag definition to the definitions of the flags covered by the Extended Capability IE. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5dfd949ade25..2dbefeffc43c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2713,6 +2713,13 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT BIT(5) #define WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT BIT(6) +/* + * When set, indicates that the AP is able to tolerate 26-tone RU UL + * OFDMA transmissions using HE TB PPDU from OBSS (not falsely classify the + * 26-tone RU UL OFDMA transmissions as radar pulses). + */ +#define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7) + /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(1) -- cgit v1.2.3 From 42df744c4166af6959eda2df1ee5cde744d4a1c3 Mon Sep 17 00:00:00 2001 From: Janne Karhunen Date: Fri, 14 Jun 2019 15:20:14 +0300 Subject: LSM: switch to blocking policy update notifiers Atomic policy updaters are not very useful as they cannot usually perform the policy updates on their own. Since it seems that there is no strict need for the atomicity, switch to the blocking variant. While doing so, rename the functions accordingly. Signed-off-by: Janne Karhunen Acked-by: Paul Moore Acked-by: James Morris Signed-off-by: Mimi Zohar --- drivers/infiniband/core/device.c | 6 +++--- include/linux/security.h | 12 ++++++------ security/security.c | 23 +++++++++++++---------- security/selinux/hooks.c | 2 +- security/selinux/selinuxfs.c | 2 +- 5 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 78dc07c6ac4b..61c0c93a2e73 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2499,7 +2499,7 @@ static int __init ib_core_init(void) goto err_mad; } - ret = register_lsm_notifier(&ibdev_lsm_nb); + ret = register_blocking_lsm_notifier(&ibdev_lsm_nb); if (ret) { pr_warn("Couldn't register LSM notifier. ret %d\n", ret); goto err_sa; @@ -2518,7 +2518,7 @@ static int __init ib_core_init(void) return 0; err_compat: - unregister_lsm_notifier(&ibdev_lsm_nb); + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); err_sa: ib_sa_cleanup(); err_mad: @@ -2544,7 +2544,7 @@ static void __exit ib_core_cleanup(void) nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); unregister_pernet_device(&rdma_dev_net_ops); - unregister_lsm_notifier(&ibdev_lsm_nb); + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); ib_mad_cleanup(); addr_cleanup(); diff --git a/include/linux/security.h b/include/linux/security.h index 659071c2e57c..5f7441abbf42 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -189,9 +189,9 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) #ifdef CONFIG_SECURITY -int call_lsm_notifier(enum lsm_event event, void *data); -int register_lsm_notifier(struct notifier_block *nb); -int unregister_lsm_notifier(struct notifier_block *nb); +int call_blocking_lsm_notifier(enum lsm_event event, void *data); +int register_blocking_lsm_notifier(struct notifier_block *nb); +int unregister_blocking_lsm_notifier(struct notifier_block *nb); /* prototypes */ extern int security_init(void); @@ -394,17 +394,17 @@ int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); #else /* CONFIG_SECURITY */ -static inline int call_lsm_notifier(enum lsm_event event, void *data) +static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) { return 0; } -static inline int register_lsm_notifier(struct notifier_block *nb) +static inline int register_blocking_lsm_notifier(struct notifier_block *nb) { return 0; } -static inline int unregister_lsm_notifier(struct notifier_block *nb) +static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb) { return 0; } diff --git a/security/security.c b/security/security.c index 613a5c00e602..47e5849d7557 100644 --- a/security/security.c +++ b/security/security.c @@ -39,7 +39,7 @@ #define LSM_COUNT (__end_lsm_info - __start_lsm_info) struct security_hook_heads security_hook_heads __lsm_ro_after_init; -static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); +static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain); static struct kmem_cache *lsm_file_cache; static struct kmem_cache *lsm_inode_cache; @@ -430,23 +430,26 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count, panic("%s - Cannot get early memory.\n", __func__); } -int call_lsm_notifier(enum lsm_event event, void *data) +int call_blocking_lsm_notifier(enum lsm_event event, void *data) { - return atomic_notifier_call_chain(&lsm_notifier_chain, event, data); + return blocking_notifier_call_chain(&blocking_lsm_notifier_chain, + event, data); } -EXPORT_SYMBOL(call_lsm_notifier); +EXPORT_SYMBOL(call_blocking_lsm_notifier); -int register_lsm_notifier(struct notifier_block *nb) +int register_blocking_lsm_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&lsm_notifier_chain, nb); + return blocking_notifier_chain_register(&blocking_lsm_notifier_chain, + nb); } -EXPORT_SYMBOL(register_lsm_notifier); +EXPORT_SYMBOL(register_blocking_lsm_notifier); -int unregister_lsm_notifier(struct notifier_block *nb) +int unregister_blocking_lsm_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&lsm_notifier_chain, nb); + return blocking_notifier_chain_unregister(&blocking_lsm_notifier_chain, + nb); } -EXPORT_SYMBOL(unregister_lsm_notifier); +EXPORT_SYMBOL(unregister_blocking_lsm_notifier); /** * lsm_cred_alloc - allocate a composite cred blob diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c61787b15f27..c1e37018c8eb 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -197,7 +197,7 @@ static int selinux_lsm_notifier_avc_callback(u32 event) { if (event == AVC_CALLBACK_RESET) { sel_ib_pkey_flush(); - call_lsm_notifier(LSM_POLICY_CHANGE, NULL); + call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL); } return 0; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 145ee62f205a..1e2e3e4b5fdb 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -180,7 +180,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, selnl_notify_setenforce(new_value); selinux_status_update_setenforce(state, new_value); if (!new_value) - call_lsm_notifier(LSM_POLICY_CHANGE, NULL); + call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL); } length = count; out: -- cgit v1.2.3 From 10ffebbed5503b1830c7920ef528075785351be6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 12 Jun 2019 14:52:44 -0300 Subject: docs: fault-injection: convert docs to ReST and rename to *.rst The conversion is actually: - add blank lines and identation in order to identify paragraphs; - fix tables markups; - add some lists markups; - mark literal blocks; - adjust title markups. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Acked-by: Federico Vaga Signed-off-by: Jonathan Corbet --- Documentation/fault-injection/fault-injection.rst | 446 +++++++++++++++++++++ Documentation/fault-injection/fault-injection.txt | 435 -------------------- Documentation/fault-injection/index.rst | 20 + .../fault-injection/notifier-error-inject.rst | 98 +++++ .../fault-injection/notifier-error-inject.txt | 94 ----- .../fault-injection/nvme-fault-injection.rst | 120 ++++++ .../fault-injection/nvme-fault-injection.txt | 116 ------ Documentation/fault-injection/provoke-crashes.rst | 48 +++ Documentation/fault-injection/provoke-crashes.txt | 38 -- Documentation/process/4.Coding.rst | 2 +- .../translations/it_IT/process/4.Coding.rst | 2 +- .../translations/zh_CN/process/4.Coding.rst | 2 +- drivers/misc/lkdtm/core.c | 2 +- include/linux/fault-inject.h | 2 +- lib/Kconfig.debug | 2 +- tools/testing/fault-injection/failcmd.sh | 2 +- 16 files changed, 739 insertions(+), 690 deletions(-) create mode 100644 Documentation/fault-injection/fault-injection.rst delete mode 100644 Documentation/fault-injection/fault-injection.txt create mode 100644 Documentation/fault-injection/index.rst create mode 100644 Documentation/fault-injection/notifier-error-inject.rst delete mode 100644 Documentation/fault-injection/notifier-error-inject.txt create mode 100644 Documentation/fault-injection/nvme-fault-injection.rst delete mode 100644 Documentation/fault-injection/nvme-fault-injection.txt create mode 100644 Documentation/fault-injection/provoke-crashes.rst delete mode 100644 Documentation/fault-injection/provoke-crashes.txt (limited to 'include/linux') diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst new file mode 100644 index 000000000000..f51bb21d20e4 --- /dev/null +++ b/Documentation/fault-injection/fault-injection.rst @@ -0,0 +1,446 @@ +=========================================== +Fault injection capabilities infrastructure +=========================================== + +See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug. + + +Available fault injection capabilities +-------------------------------------- + +- failslab + + injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...) + +- fail_page_alloc + + injects page allocation failures. (alloc_pages(), get_free_pages(), ...) + +- fail_futex + + injects futex deadlock and uaddr fault errors. + +- fail_make_request + + injects disk IO errors on devices permitted by setting + /sys/block//make-it-fail or + /sys/block///make-it-fail. (generic_make_request()) + +- fail_mmc_request + + injects MMC data errors on devices permitted by setting + debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request + +- fail_function + + injects error return on specific functions, which are marked by + ALLOW_ERROR_INJECTION() macro, by setting debugfs entries + under /sys/kernel/debug/fail_function. No boot option supported. + +- NVMe fault injection + + inject NVMe status code and retry flag on devices permitted by setting + debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default + status code is NVME_SC_INVALID_OPCODE with no retry. The status code and + retry flag can be set via the debugfs. + + +Configure fault-injection capabilities behavior +----------------------------------------------- + +debugfs entries +^^^^^^^^^^^^^^^ + +fault-inject-debugfs kernel module provides some debugfs entries for runtime +configuration of fault-injection capabilities. + +- /sys/kernel/debug/fail*/probability: + + likelihood of failure injection, in percent. + + Format: + + Note that one-failure-per-hundred is a very high error rate + for some testcases. Consider setting probability=100 and configure + /sys/kernel/debug/fail*/interval for such testcases. + +- /sys/kernel/debug/fail*/interval: + + specifies the interval between failures, for calls to + should_fail() that pass all the other tests. + + Note that if you enable this, by setting interval>1, you will + probably want to set probability=100. + +- /sys/kernel/debug/fail*/times: + + specifies how many times failures may happen at most. + A value of -1 means "no limit". + +- /sys/kernel/debug/fail*/space: + + specifies an initial resource "budget", decremented by "size" + on each call to should_fail(,size). Failure injection is + suppressed until "space" reaches zero. + +- /sys/kernel/debug/fail*/verbose + + Format: { 0 | 1 | 2 } + + specifies the verbosity of the messages when failure is + injected. '0' means no messages; '1' will print only a single + log line per failure; '2' will print a call trace too -- useful + to debug the problems revealed by fault injection. + +- /sys/kernel/debug/fail*/task-filter: + + Format: { 'Y' | 'N' } + + A value of 'N' disables filtering by process (default). + Any positive value limits failures to only processes indicated by + /proc//make-it-fail==1. + +- /sys/kernel/debug/fail*/require-start, + /sys/kernel/debug/fail*/require-end, + /sys/kernel/debug/fail*/reject-start, + /sys/kernel/debug/fail*/reject-end: + + specifies the range of virtual addresses tested during + stacktrace walking. Failure is injected only if some caller + in the walked stacktrace lies within the required range, and + none lies within the rejected range. + Default required range is [0,ULONG_MAX) (whole of virtual address space). + Default rejected range is [0,0). + +- /sys/kernel/debug/fail*/stacktrace-depth: + + specifies the maximum stacktrace depth walked during search + for a caller within [require-start,require-end) OR + [reject-start,reject-end). + +- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem: + + Format: { 'Y' | 'N' } + + default is 'N', setting it to 'Y' won't inject failures into + highmem/user allocations. + +- /sys/kernel/debug/failslab/ignore-gfp-wait: +- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait: + + Format: { 'Y' | 'N' } + + default is 'N', setting it to 'Y' will inject failures + only into non-sleep allocations (GFP_ATOMIC allocations). + +- /sys/kernel/debug/fail_page_alloc/min-order: + + specifies the minimum page allocation order to be injected + failures. + +- /sys/kernel/debug/fail_futex/ignore-private: + + Format: { 'Y' | 'N' } + + default is 'N', setting it to 'Y' will disable failure injections + when dealing with private (address space) futexes. + +- /sys/kernel/debug/fail_function/inject: + + Format: { 'function-name' | '!function-name' | '' } + + specifies the target function of error injection by name. + If the function name leads '!' prefix, given function is + removed from injection list. If nothing specified ('') + injection list is cleared. + +- /sys/kernel/debug/fail_function/injectable: + + (read only) shows error injectable functions and what type of + error values can be specified. The error type will be one of + below; + - NULL: retval must be 0. + - ERRNO: retval must be -1 to -MAX_ERRNO (-4096). + - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096). + +- /sys/kernel/debug/fail_function//retval: + + specifies the "error" return value to inject to the given + function for given function. This will be created when + user specifies new injection entry. + +Boot option +^^^^^^^^^^^ + +In order to inject faults while debugfs is not available (early boot time), +use the boot option:: + + failslab= + fail_page_alloc= + fail_make_request= + fail_futex= + mmc_core.fail_request=,,, + +proc entries +^^^^^^^^^^^^ + +- /proc//fail-nth, + /proc/self/task//fail-nth: + + Write to this file of integer N makes N-th call in the task fail. + Read from this file returns a integer value. A value of '0' indicates + that the fault setup with a previous write to this file was injected. + A positive integer N indicates that the fault wasn't yet injected. + Note that this file enables all types of faults (slab, futex, etc). + This setting takes precedence over all other generic debugfs settings + like probability, interval, times, etc. But per-capability settings + (e.g. fail_futex/ignore-private) take precedence over it. + + This feature is intended for systematic testing of faults in a single + system call. See an example below. + +How to add new fault injection capability +----------------------------------------- + +- #include + +- define the fault attributes + + DECLARE_FAULT_ATTR(name); + + Please see the definition of struct fault_attr in fault-inject.h + for details. + +- provide a way to configure fault attributes + +- boot option + + If you need to enable the fault injection capability from boot time, you can + provide boot option to configure it. There is a helper function for it: + + setup_fault_attr(attr, str); + +- debugfs entries + + failslab, fail_page_alloc, and fail_make_request use this way. + Helper functions: + + fault_create_debugfs_attr(name, parent, attr); + +- module parameters + + If the scope of the fault injection capability is limited to a + single kernel module, it is better to provide module parameters to + configure the fault attributes. + +- add a hook to insert failures + + Upon should_fail() returning true, client code should inject a failure: + + should_fail(attr, size); + +Application Examples +-------------------- + +- Inject slab allocation failures into module init/exit code:: + + #!/bin/bash + + FAILTYPE=failslab + echo Y > /sys/kernel/debug/$FAILTYPE/task-filter + echo 10 > /sys/kernel/debug/$FAILTYPE/probability + echo 100 > /sys/kernel/debug/$FAILTYPE/interval + echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 2 > /sys/kernel/debug/$FAILTYPE/verbose + echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait + + faulty_system() + { + bash -c "echo 1 > /proc/self/make-it-fail && exec $*" + } + + if [ $# -eq 0 ] + then + echo "Usage: $0 modulename [ modulename ... ]" + exit 1 + fi + + for m in $* + do + echo inserting $m... + faulty_system modprobe $m + + echo removing $m... + faulty_system modprobe -r $m + done + +------------------------------------------------------------------------------ + +- Inject page allocation failures only for a specific module:: + + #!/bin/bash + + FAILTYPE=fail_page_alloc + module=$1 + + if [ -z $module ] + then + echo "Usage: $0 " + exit 1 + fi + + modprobe $module + + if [ ! -d /sys/module/$module/sections ] + then + echo Module $module is not loaded + exit 1 + fi + + cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start + cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end + + echo N > /sys/kernel/debug/$FAILTYPE/task-filter + echo 10 > /sys/kernel/debug/$FAILTYPE/probability + echo 100 > /sys/kernel/debug/$FAILTYPE/interval + echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 2 > /sys/kernel/debug/$FAILTYPE/verbose + echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait + echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem + echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth + + trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT + + echo "Injecting errors into the module $module... (interrupt to stop)" + sleep 1000000 + +------------------------------------------------------------------------------ + +- Inject open_ctree error while btrfs mount:: + + #!/bin/bash + + rm -f testfile.img + dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 + DEVICE=$(losetup --show -f testfile.img) + mkfs.btrfs -f $DEVICE + mkdir -p tmpmnt + + FAILTYPE=fail_function + FAILFUNC=open_ctree + echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject + echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval + echo N > /sys/kernel/debug/$FAILTYPE/task-filter + echo 100 > /sys/kernel/debug/$FAILTYPE/probability + echo 0 > /sys/kernel/debug/$FAILTYPE/interval + echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 1 > /sys/kernel/debug/$FAILTYPE/verbose + + mount -t btrfs $DEVICE tmpmnt + if [ $? -ne 0 ] + then + echo "SUCCESS!" + else + echo "FAILED!" + umount tmpmnt + fi + + echo > /sys/kernel/debug/$FAILTYPE/inject + + rmdir tmpmnt + losetup -d $DEVICE + rm testfile.img + + +Tool to run command with failslab or fail_page_alloc +---------------------------------------------------- +In order to make it easier to accomplish the tasks mentioned above, we can use +tools/testing/fault-injection/failcmd.sh. Please run a command +"./tools/testing/fault-injection/failcmd.sh --help" for more information and +see the following examples. + +Examples: + +Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab +allocation failure:: + + # ./tools/testing/fault-injection/failcmd.sh \ + -- make -C tools/testing/selftests/ run_tests + +Same as above except to specify 100 times failures at most instead of one time +at most by default:: + + # ./tools/testing/fault-injection/failcmd.sh --times=100 \ + -- make -C tools/testing/selftests/ run_tests + +Same as above except to inject page allocation failure instead of slab +allocation failure:: + + # env FAILCMD_TYPE=fail_page_alloc \ + ./tools/testing/fault-injection/failcmd.sh --times=100 \ + -- make -C tools/testing/selftests/ run_tests + +Systematic faults using fail-nth +--------------------------------- + +The following code systematically faults 0-th, 1-st, 2-nd and so on +capabilities in the socketpair() system call:: + + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + int main() + { + int i, err, res, fail_nth, fds[2]; + char buf[128]; + + system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait"); + sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid)); + fail_nth = open(buf, O_RDWR); + for (i = 1;; i++) { + sprintf(buf, "%d", i); + write(fail_nth, buf, strlen(buf)); + res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds); + err = errno; + pread(fail_nth, buf, sizeof(buf), 0); + if (res == 0) { + close(fds[0]); + close(fds[1]); + } + printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y', + res, err); + if (atoi(buf)) + break; + } + return 0; + } + +An example output:: + + 1-th fault Y: res=-1/23 + 2-th fault Y: res=-1/23 + 3-th fault Y: res=-1/12 + 4-th fault Y: res=-1/12 + 5-th fault Y: res=-1/23 + 6-th fault Y: res=-1/23 + 7-th fault Y: res=-1/23 + 8-th fault Y: res=-1/12 + 9-th fault Y: res=-1/12 + 10-th fault Y: res=-1/12 + 11-th fault Y: res=-1/12 + 12-th fault Y: res=-1/12 + 13-th fault Y: res=-1/12 + 14-th fault Y: res=-1/12 + 15-th fault Y: res=-1/12 + 16-th fault N: res=0/12 diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt deleted file mode 100644 index a17517a083c3..000000000000 --- a/Documentation/fault-injection/fault-injection.txt +++ /dev/null @@ -1,435 +0,0 @@ -Fault injection capabilities infrastructure -=========================================== - -See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug. - - -Available fault injection capabilities --------------------------------------- - -o failslab - - injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...) - -o fail_page_alloc - - injects page allocation failures. (alloc_pages(), get_free_pages(), ...) - -o fail_futex - - injects futex deadlock and uaddr fault errors. - -o fail_make_request - - injects disk IO errors on devices permitted by setting - /sys/block//make-it-fail or - /sys/block///make-it-fail. (generic_make_request()) - -o fail_mmc_request - - injects MMC data errors on devices permitted by setting - debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request - -o fail_function - - injects error return on specific functions, which are marked by - ALLOW_ERROR_INJECTION() macro, by setting debugfs entries - under /sys/kernel/debug/fail_function. No boot option supported. - -o NVMe fault injection - - inject NVMe status code and retry flag on devices permitted by setting - debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default - status code is NVME_SC_INVALID_OPCODE with no retry. The status code and - retry flag can be set via the debugfs. - - -Configure fault-injection capabilities behavior ------------------------------------------------ - -o debugfs entries - -fault-inject-debugfs kernel module provides some debugfs entries for runtime -configuration of fault-injection capabilities. - -- /sys/kernel/debug/fail*/probability: - - likelihood of failure injection, in percent. - Format: - - Note that one-failure-per-hundred is a very high error rate - for some testcases. Consider setting probability=100 and configure - /sys/kernel/debug/fail*/interval for such testcases. - -- /sys/kernel/debug/fail*/interval: - - specifies the interval between failures, for calls to - should_fail() that pass all the other tests. - - Note that if you enable this, by setting interval>1, you will - probably want to set probability=100. - -- /sys/kernel/debug/fail*/times: - - specifies how many times failures may happen at most. - A value of -1 means "no limit". - -- /sys/kernel/debug/fail*/space: - - specifies an initial resource "budget", decremented by "size" - on each call to should_fail(,size). Failure injection is - suppressed until "space" reaches zero. - -- /sys/kernel/debug/fail*/verbose - - Format: { 0 | 1 | 2 } - specifies the verbosity of the messages when failure is - injected. '0' means no messages; '1' will print only a single - log line per failure; '2' will print a call trace too -- useful - to debug the problems revealed by fault injection. - -- /sys/kernel/debug/fail*/task-filter: - - Format: { 'Y' | 'N' } - A value of 'N' disables filtering by process (default). - Any positive value limits failures to only processes indicated by - /proc//make-it-fail==1. - -- /sys/kernel/debug/fail*/require-start: -- /sys/kernel/debug/fail*/require-end: -- /sys/kernel/debug/fail*/reject-start: -- /sys/kernel/debug/fail*/reject-end: - - specifies the range of virtual addresses tested during - stacktrace walking. Failure is injected only if some caller - in the walked stacktrace lies within the required range, and - none lies within the rejected range. - Default required range is [0,ULONG_MAX) (whole of virtual address space). - Default rejected range is [0,0). - -- /sys/kernel/debug/fail*/stacktrace-depth: - - specifies the maximum stacktrace depth walked during search - for a caller within [require-start,require-end) OR - [reject-start,reject-end). - -- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem: - - Format: { 'Y' | 'N' } - default is 'N', setting it to 'Y' won't inject failures into - highmem/user allocations. - -- /sys/kernel/debug/failslab/ignore-gfp-wait: -- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait: - - Format: { 'Y' | 'N' } - default is 'N', setting it to 'Y' will inject failures - only into non-sleep allocations (GFP_ATOMIC allocations). - -- /sys/kernel/debug/fail_page_alloc/min-order: - - specifies the minimum page allocation order to be injected - failures. - -- /sys/kernel/debug/fail_futex/ignore-private: - - Format: { 'Y' | 'N' } - default is 'N', setting it to 'Y' will disable failure injections - when dealing with private (address space) futexes. - -- /sys/kernel/debug/fail_function/inject: - - Format: { 'function-name' | '!function-name' | '' } - specifies the target function of error injection by name. - If the function name leads '!' prefix, given function is - removed from injection list. If nothing specified ('') - injection list is cleared. - -- /sys/kernel/debug/fail_function/injectable: - - (read only) shows error injectable functions and what type of - error values can be specified. The error type will be one of - below; - - NULL: retval must be 0. - - ERRNO: retval must be -1 to -MAX_ERRNO (-4096). - - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096). - -- /sys/kernel/debug/fail_function//retval: - - specifies the "error" return value to inject to the given - function for given function. This will be created when - user specifies new injection entry. - -o Boot option - -In order to inject faults while debugfs is not available (early boot time), -use the boot option: - - failslab= - fail_page_alloc= - fail_make_request= - fail_futex= - mmc_core.fail_request=,,, - -o proc entries - -- /proc//fail-nth: -- /proc/self/task//fail-nth: - - Write to this file of integer N makes N-th call in the task fail. - Read from this file returns a integer value. A value of '0' indicates - that the fault setup with a previous write to this file was injected. - A positive integer N indicates that the fault wasn't yet injected. - Note that this file enables all types of faults (slab, futex, etc). - This setting takes precedence over all other generic debugfs settings - like probability, interval, times, etc. But per-capability settings - (e.g. fail_futex/ignore-private) take precedence over it. - - This feature is intended for systematic testing of faults in a single - system call. See an example below. - -How to add new fault injection capability ------------------------------------------ - -o #include - -o define the fault attributes - - DECLARE_FAULT_ATTR(name); - - Please see the definition of struct fault_attr in fault-inject.h - for details. - -o provide a way to configure fault attributes - -- boot option - - If you need to enable the fault injection capability from boot time, you can - provide boot option to configure it. There is a helper function for it: - - setup_fault_attr(attr, str); - -- debugfs entries - - failslab, fail_page_alloc, and fail_make_request use this way. - Helper functions: - - fault_create_debugfs_attr(name, parent, attr); - -- module parameters - - If the scope of the fault injection capability is limited to a - single kernel module, it is better to provide module parameters to - configure the fault attributes. - -o add a hook to insert failures - - Upon should_fail() returning true, client code should inject a failure. - - should_fail(attr, size); - -Application Examples --------------------- - -o Inject slab allocation failures into module init/exit code - -#!/bin/bash - -FAILTYPE=failslab -echo Y > /sys/kernel/debug/$FAILTYPE/task-filter -echo 10 > /sys/kernel/debug/$FAILTYPE/probability -echo 100 > /sys/kernel/debug/$FAILTYPE/interval -echo -1 > /sys/kernel/debug/$FAILTYPE/times -echo 0 > /sys/kernel/debug/$FAILTYPE/space -echo 2 > /sys/kernel/debug/$FAILTYPE/verbose -echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait - -faulty_system() -{ - bash -c "echo 1 > /proc/self/make-it-fail && exec $*" -} - -if [ $# -eq 0 ] -then - echo "Usage: $0 modulename [ modulename ... ]" - exit 1 -fi - -for m in $* -do - echo inserting $m... - faulty_system modprobe $m - - echo removing $m... - faulty_system modprobe -r $m -done - ------------------------------------------------------------------------------- - -o Inject page allocation failures only for a specific module - -#!/bin/bash - -FAILTYPE=fail_page_alloc -module=$1 - -if [ -z $module ] -then - echo "Usage: $0 " - exit 1 -fi - -modprobe $module - -if [ ! -d /sys/module/$module/sections ] -then - echo Module $module is not loaded - exit 1 -fi - -cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start -cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end - -echo N > /sys/kernel/debug/$FAILTYPE/task-filter -echo 10 > /sys/kernel/debug/$FAILTYPE/probability -echo 100 > /sys/kernel/debug/$FAILTYPE/interval -echo -1 > /sys/kernel/debug/$FAILTYPE/times -echo 0 > /sys/kernel/debug/$FAILTYPE/space -echo 2 > /sys/kernel/debug/$FAILTYPE/verbose -echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait -echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem -echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth - -trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT - -echo "Injecting errors into the module $module... (interrupt to stop)" -sleep 1000000 - ------------------------------------------------------------------------------- - -o Inject open_ctree error while btrfs mount - -#!/bin/bash - -rm -f testfile.img -dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 -DEVICE=$(losetup --show -f testfile.img) -mkfs.btrfs -f $DEVICE -mkdir -p tmpmnt - -FAILTYPE=fail_function -FAILFUNC=open_ctree -echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject -echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval -echo N > /sys/kernel/debug/$FAILTYPE/task-filter -echo 100 > /sys/kernel/debug/$FAILTYPE/probability -echo 0 > /sys/kernel/debug/$FAILTYPE/interval -echo -1 > /sys/kernel/debug/$FAILTYPE/times -echo 0 > /sys/kernel/debug/$FAILTYPE/space -echo 1 > /sys/kernel/debug/$FAILTYPE/verbose - -mount -t btrfs $DEVICE tmpmnt -if [ $? -ne 0 ] -then - echo "SUCCESS!" -else - echo "FAILED!" - umount tmpmnt -fi - -echo > /sys/kernel/debug/$FAILTYPE/inject - -rmdir tmpmnt -losetup -d $DEVICE -rm testfile.img - - -Tool to run command with failslab or fail_page_alloc ----------------------------------------------------- -In order to make it easier to accomplish the tasks mentioned above, we can use -tools/testing/fault-injection/failcmd.sh. Please run a command -"./tools/testing/fault-injection/failcmd.sh --help" for more information and -see the following examples. - -Examples: - -Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab -allocation failure. - - # ./tools/testing/fault-injection/failcmd.sh \ - -- make -C tools/testing/selftests/ run_tests - -Same as above except to specify 100 times failures at most instead of one time -at most by default. - - # ./tools/testing/fault-injection/failcmd.sh --times=100 \ - -- make -C tools/testing/selftests/ run_tests - -Same as above except to inject page allocation failure instead of slab -allocation failure. - - # env FAILCMD_TYPE=fail_page_alloc \ - ./tools/testing/fault-injection/failcmd.sh --times=100 \ - -- make -C tools/testing/selftests/ run_tests - -Systematic faults using fail-nth ---------------------------------- - -The following code systematically faults 0-th, 1-st, 2-nd and so on -capabilities in the socketpair() system call. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main() -{ - int i, err, res, fail_nth, fds[2]; - char buf[128]; - - system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait"); - sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid)); - fail_nth = open(buf, O_RDWR); - for (i = 1;; i++) { - sprintf(buf, "%d", i); - write(fail_nth, buf, strlen(buf)); - res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds); - err = errno; - pread(fail_nth, buf, sizeof(buf), 0); - if (res == 0) { - close(fds[0]); - close(fds[1]); - } - printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y', - res, err); - if (atoi(buf)) - break; - } - return 0; -} - -An example output: - -1-th fault Y: res=-1/23 -2-th fault Y: res=-1/23 -3-th fault Y: res=-1/12 -4-th fault Y: res=-1/12 -5-th fault Y: res=-1/23 -6-th fault Y: res=-1/23 -7-th fault Y: res=-1/23 -8-th fault Y: res=-1/12 -9-th fault Y: res=-1/12 -10-th fault Y: res=-1/12 -11-th fault Y: res=-1/12 -12-th fault Y: res=-1/12 -13-th fault Y: res=-1/12 -14-th fault Y: res=-1/12 -15-th fault Y: res=-1/12 -16-th fault N: res=0/12 diff --git a/Documentation/fault-injection/index.rst b/Documentation/fault-injection/index.rst new file mode 100644 index 000000000000..92b5639ed07a --- /dev/null +++ b/Documentation/fault-injection/index.rst @@ -0,0 +1,20 @@ +:orphan: + +=============== +fault-injection +=============== + +.. toctree:: + :maxdepth: 1 + + fault-injection + notifier-error-inject + nvme-fault-injection + provoke-crashes + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/fault-injection/notifier-error-inject.rst b/Documentation/fault-injection/notifier-error-inject.rst new file mode 100644 index 000000000000..1668b6e48d3a --- /dev/null +++ b/Documentation/fault-injection/notifier-error-inject.rst @@ -0,0 +1,98 @@ +Notifier error injection +======================== + +Notifier error injection provides the ability to inject artificial errors to +specified notifier chain callbacks. It is useful to test the error handling of +notifier call chain failures which is rarely executed. There are kernel +modules that can be used to test the following notifiers. + + * PM notifier + * Memory hotplug notifier + * powerpc pSeries reconfig notifier + * Netdevice notifier + +PM notifier error injection module +---------------------------------- +This feature is controlled through debugfs interface + + /sys/kernel/debug/notifier-error-inject/pm/actions//error + +Possible PM notifier events to be failed are: + + * PM_HIBERNATION_PREPARE + * PM_SUSPEND_PREPARE + * PM_RESTORE_PREPARE + +Example: Inject PM suspend error (-12 = -ENOMEM):: + + # cd /sys/kernel/debug/notifier-error-inject/pm/ + # echo -12 > actions/PM_SUSPEND_PREPARE/error + # echo mem > /sys/power/state + bash: echo: write error: Cannot allocate memory + +Memory hotplug notifier error injection module +---------------------------------------------- +This feature is controlled through debugfs interface + + /sys/kernel/debug/notifier-error-inject/memory/actions//error + +Possible memory notifier events to be failed are: + + * MEM_GOING_ONLINE + * MEM_GOING_OFFLINE + +Example: Inject memory hotplug offline error (-12 == -ENOMEM):: + + # cd /sys/kernel/debug/notifier-error-inject/memory + # echo -12 > actions/MEM_GOING_OFFLINE/error + # echo offline > /sys/devices/system/memory/memoryXXX/state + bash: echo: write error: Cannot allocate memory + +powerpc pSeries reconfig notifier error injection module +-------------------------------------------------------- +This feature is controlled through debugfs interface + + /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/actions//error + +Possible pSeries reconfig notifier events to be failed are: + + * PSERIES_RECONFIG_ADD + * PSERIES_RECONFIG_REMOVE + * PSERIES_DRCONF_MEM_ADD + * PSERIES_DRCONF_MEM_REMOVE + +Netdevice notifier error injection module +---------------------------------------------- +This feature is controlled through debugfs interface + + /sys/kernel/debug/notifier-error-inject/netdev/actions//error + +Netdevice notifier events which can be failed are: + + * NETDEV_REGISTER + * NETDEV_CHANGEMTU + * NETDEV_CHANGENAME + * NETDEV_PRE_UP + * NETDEV_PRE_TYPE_CHANGE + * NETDEV_POST_INIT + * NETDEV_PRECHANGEMTU + * NETDEV_PRECHANGEUPPER + * NETDEV_CHANGEUPPER + +Example: Inject netdevice mtu change error (-22 == -EINVAL):: + + # cd /sys/kernel/debug/notifier-error-inject/netdev + # echo -22 > actions/NETDEV_CHANGEMTU/error + # ip link set eth0 mtu 1024 + RTNETLINK answers: Invalid argument + +For more usage examples +----------------------- +There are tools/testing/selftests using the notifier error injection features +for CPU and memory notifiers. + + * tools/testing/selftests/cpu-hotplug/on-off-test.sh + * tools/testing/selftests/memory-hotplug/on-off-test.sh + +These scripts first do simple online and offline tests and then do fault +injection tests if notifier error injection module is available. diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt deleted file mode 100644 index e861d761de24..000000000000 --- a/Documentation/fault-injection/notifier-error-inject.txt +++ /dev/null @@ -1,94 +0,0 @@ -Notifier error injection -======================== - -Notifier error injection provides the ability to inject artificial errors to -specified notifier chain callbacks. It is useful to test the error handling of -notifier call chain failures which is rarely executed. There are kernel -modules that can be used to test the following notifiers. - - * PM notifier - * Memory hotplug notifier - * powerpc pSeries reconfig notifier - * Netdevice notifier - -PM notifier error injection module ----------------------------------- -This feature is controlled through debugfs interface -/sys/kernel/debug/notifier-error-inject/pm/actions//error - -Possible PM notifier events to be failed are: - - * PM_HIBERNATION_PREPARE - * PM_SUSPEND_PREPARE - * PM_RESTORE_PREPARE - -Example: Inject PM suspend error (-12 = -ENOMEM) - - # cd /sys/kernel/debug/notifier-error-inject/pm/ - # echo -12 > actions/PM_SUSPEND_PREPARE/error - # echo mem > /sys/power/state - bash: echo: write error: Cannot allocate memory - -Memory hotplug notifier error injection module ----------------------------------------------- -This feature is controlled through debugfs interface -/sys/kernel/debug/notifier-error-inject/memory/actions//error - -Possible memory notifier events to be failed are: - - * MEM_GOING_ONLINE - * MEM_GOING_OFFLINE - -Example: Inject memory hotplug offline error (-12 == -ENOMEM) - - # cd /sys/kernel/debug/notifier-error-inject/memory - # echo -12 > actions/MEM_GOING_OFFLINE/error - # echo offline > /sys/devices/system/memory/memoryXXX/state - bash: echo: write error: Cannot allocate memory - -powerpc pSeries reconfig notifier error injection module --------------------------------------------------------- -This feature is controlled through debugfs interface -/sys/kernel/debug/notifier-error-inject/pSeries-reconfig/actions//error - -Possible pSeries reconfig notifier events to be failed are: - - * PSERIES_RECONFIG_ADD - * PSERIES_RECONFIG_REMOVE - * PSERIES_DRCONF_MEM_ADD - * PSERIES_DRCONF_MEM_REMOVE - -Netdevice notifier error injection module ----------------------------------------------- -This feature is controlled through debugfs interface -/sys/kernel/debug/notifier-error-inject/netdev/actions//error - -Netdevice notifier events which can be failed are: - - * NETDEV_REGISTER - * NETDEV_CHANGEMTU - * NETDEV_CHANGENAME - * NETDEV_PRE_UP - * NETDEV_PRE_TYPE_CHANGE - * NETDEV_POST_INIT - * NETDEV_PRECHANGEMTU - * NETDEV_PRECHANGEUPPER - * NETDEV_CHANGEUPPER - -Example: Inject netdevice mtu change error (-22 == -EINVAL) - - # cd /sys/kernel/debug/notifier-error-inject/netdev - # echo -22 > actions/NETDEV_CHANGEMTU/error - # ip link set eth0 mtu 1024 - RTNETLINK answers: Invalid argument - -For more usage examples ------------------------ -There are tools/testing/selftests using the notifier error injection features -for CPU and memory notifiers. - - * tools/testing/selftests/cpu-hotplug/on-off-test.sh - * tools/testing/selftests/memory-hotplug/on-off-test.sh - -These scripts first do simple online and offline tests and then do fault -injection tests if notifier error injection module is available. diff --git a/Documentation/fault-injection/nvme-fault-injection.rst b/Documentation/fault-injection/nvme-fault-injection.rst new file mode 100644 index 000000000000..bbb1bf3e8650 --- /dev/null +++ b/Documentation/fault-injection/nvme-fault-injection.rst @@ -0,0 +1,120 @@ +NVMe Fault Injection +==================== +Linux's fault injection framework provides a systematic way to support +error injection via debugfs in the /sys/kernel/debug directory. When +enabled, the default NVME_SC_INVALID_OPCODE with no retry will be +injected into the nvme_end_request. Users can change the default status +code and no retry flag via the debugfs. The list of Generic Command +Status can be found in include/linux/nvme.h + +Following examples show how to inject an error into the nvme. + +First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config, +recompile the kernel. After booting up the kernel, do the +following. + +Example 1: Inject default status code with no retry +--------------------------------------------------- + +:: + + mount /dev/nvme0n1 /mnt + echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times + echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability + cp a.file /mnt + +Expected Result:: + + cp: cannot stat ‘/mnt/a.file’: Input/output error + +Message from dmesg:: + + FAULT_INJECTION: forcing a failure. + name fault_inject, interval 1, probability 100, space 0, times 1 + CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2 + Hardware name: innotek GmbH VirtualBox/VirtualBox, + BIOS VirtualBox 12/01/2006 + Call Trace: + + dump_stack+0x5c/0x7d + should_fail+0x148/0x170 + nvme_should_fail+0x2f/0x50 [nvme_core] + nvme_process_cq+0xe7/0x1d0 [nvme] + nvme_irq+0x1e/0x40 [nvme] + __handle_irq_event_percpu+0x3a/0x190 + handle_irq_event_percpu+0x30/0x70 + handle_irq_event+0x36/0x60 + handle_fasteoi_irq+0x78/0x120 + handle_irq+0xa7/0x130 + ? tick_irq_enter+0xa8/0xc0 + do_IRQ+0x43/0xc0 + common_interrupt+0xa2/0xa2 + + RIP: 0010:native_safe_halt+0x2/0x10 + RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd + RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480 + R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000 + ? __sched_text_end+0x4/0x4 + default_idle+0x18/0xf0 + do_idle+0x150/0x1d0 + cpu_startup_entry+0x6f/0x80 + start_kernel+0x4c4/0x4e4 + ? set_init_arg+0x55/0x55 + secondary_startup_64+0xa5/0xb0 + print_req_error: I/O error, dev nvme0n1, sector 9240 + EXT4-fs error (device nvme0n1): ext4_find_entry:1436: + inode #2: comm cp: reading directory lblock 0 + +Example 2: Inject default status code with retry +------------------------------------------------ + +:: + + mount /dev/nvme0n1 /mnt + echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times + echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability + echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status + echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry + + cp a.file /mnt + +Expected Result:: + + command success without error + +Message from dmesg:: + + FAULT_INJECTION: forcing a failure. + name fault_inject, interval 1, probability 100, space 0, times 1 + CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4 + Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 + Call Trace: + + dump_stack+0x5c/0x7d + should_fail+0x148/0x170 + nvme_should_fail+0x30/0x60 [nvme_core] + nvme_loop_queue_response+0x84/0x110 [nvme_loop] + nvmet_req_complete+0x11/0x40 [nvmet] + nvmet_bio_done+0x28/0x40 [nvmet] + blk_update_request+0xb0/0x310 + blk_mq_end_request+0x18/0x60 + flush_smp_call_function_queue+0x3d/0xf0 + smp_call_function_single_interrupt+0x2c/0xc0 + call_function_single_interrupt+0xa2/0xb0 + + RIP: 0010:native_safe_halt+0x2/0x10 + RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04 + RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680 + R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000 + ? __sched_text_end+0x4/0x4 + default_idle+0x18/0xf0 + do_idle+0x150/0x1d0 + cpu_startup_entry+0x6f/0x80 + start_secondary+0x187/0x1e0 + secondary_startup_64+0xa5/0xb0 diff --git a/Documentation/fault-injection/nvme-fault-injection.txt b/Documentation/fault-injection/nvme-fault-injection.txt deleted file mode 100644 index 8fbf3bf60b62..000000000000 --- a/Documentation/fault-injection/nvme-fault-injection.txt +++ /dev/null @@ -1,116 +0,0 @@ -NVMe Fault Injection -==================== -Linux's fault injection framework provides a systematic way to support -error injection via debugfs in the /sys/kernel/debug directory. When -enabled, the default NVME_SC_INVALID_OPCODE with no retry will be -injected into the nvme_end_request. Users can change the default status -code and no retry flag via the debugfs. The list of Generic Command -Status can be found in include/linux/nvme.h - -Following examples show how to inject an error into the nvme. - -First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config, -recompile the kernel. After booting up the kernel, do the -following. - -Example 1: Inject default status code with no retry ---------------------------------------------------- - -mount /dev/nvme0n1 /mnt -echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times -echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability -cp a.file /mnt - -Expected Result: - -cp: cannot stat ‘/mnt/a.file’: Input/output error - -Message from dmesg: - -FAULT_INJECTION: forcing a failure. -name fault_inject, interval 1, probability 100, space 0, times 1 -CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2 -Hardware name: innotek GmbH VirtualBox/VirtualBox, -BIOS VirtualBox 12/01/2006 -Call Trace: - - dump_stack+0x5c/0x7d - should_fail+0x148/0x170 - nvme_should_fail+0x2f/0x50 [nvme_core] - nvme_process_cq+0xe7/0x1d0 [nvme] - nvme_irq+0x1e/0x40 [nvme] - __handle_irq_event_percpu+0x3a/0x190 - handle_irq_event_percpu+0x30/0x70 - handle_irq_event+0x36/0x60 - handle_fasteoi_irq+0x78/0x120 - handle_irq+0xa7/0x130 - ? tick_irq_enter+0xa8/0xc0 - do_IRQ+0x43/0xc0 - common_interrupt+0xa2/0xa2 - -RIP: 0010:native_safe_halt+0x2/0x10 -RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd -RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000 -RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 -RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000 -R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480 -R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000 - ? __sched_text_end+0x4/0x4 - default_idle+0x18/0xf0 - do_idle+0x150/0x1d0 - cpu_startup_entry+0x6f/0x80 - start_kernel+0x4c4/0x4e4 - ? set_init_arg+0x55/0x55 - secondary_startup_64+0xa5/0xb0 - print_req_error: I/O error, dev nvme0n1, sector 9240 -EXT4-fs error (device nvme0n1): ext4_find_entry:1436: -inode #2: comm cp: reading directory lblock 0 - -Example 2: Inject default status code with retry ------------------------------------------------- - -mount /dev/nvme0n1 /mnt -echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times -echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability -echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status -echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry - -cp a.file /mnt - -Expected Result: - -command success without error - -Message from dmesg: - -FAULT_INJECTION: forcing a failure. -name fault_inject, interval 1, probability 100, space 0, times 1 -CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4 -Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 -Call Trace: - - dump_stack+0x5c/0x7d - should_fail+0x148/0x170 - nvme_should_fail+0x30/0x60 [nvme_core] - nvme_loop_queue_response+0x84/0x110 [nvme_loop] - nvmet_req_complete+0x11/0x40 [nvmet] - nvmet_bio_done+0x28/0x40 [nvmet] - blk_update_request+0xb0/0x310 - blk_mq_end_request+0x18/0x60 - flush_smp_call_function_queue+0x3d/0xf0 - smp_call_function_single_interrupt+0x2c/0xc0 - call_function_single_interrupt+0xa2/0xb0 - -RIP: 0010:native_safe_halt+0x2/0x10 -RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04 -RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000 -RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 -RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000 -R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680 -R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000 - ? __sched_text_end+0x4/0x4 - default_idle+0x18/0xf0 - do_idle+0x150/0x1d0 - cpu_startup_entry+0x6f/0x80 - start_secondary+0x187/0x1e0 - secondary_startup_64+0xa5/0xb0 diff --git a/Documentation/fault-injection/provoke-crashes.rst b/Documentation/fault-injection/provoke-crashes.rst new file mode 100644 index 000000000000..9279a3e12278 --- /dev/null +++ b/Documentation/fault-injection/provoke-crashes.rst @@ -0,0 +1,48 @@ +=============== +Provoke crashes +=============== + +The lkdtm module provides an interface to crash or injure the kernel at +predefined crashpoints to evaluate the reliability of crash dumps obtained +using different dumping solutions. The module uses KPROBEs to instrument +crashing points, but can also crash the kernel directly without KRPOBE +support. + + +You can provide the way either through module arguments when inserting +the module, or through a debugfs interface. + +Usage:: + + insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<> + [cpoint_count={>0}] + +recur_count + Recursion level for the stack overflow test. Default is 10. + +cpoint_name + Crash point where the kernel is to be crashed. It can be + one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY, + FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD, + IDE_CORE_CP, DIRECT + +cpoint_type + Indicates the action to be taken on hitting the crash point. + It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW, + CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION, + WRITE_AFTER_FREE, + +cpoint_count + Indicates the number of times the crash point is to be hit + to trigger an action. The default is 10. + +You can also induce failures by mounting debugfs and writing the type to +/provoke-crash/. E.g.:: + + mount -t debugfs debugfs /mnt + echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY + + +A special file is `DIRECT` which will induce the crash directly without +KPROBE instrumentation. This mode is the only one available when the module +is built on a kernel without KPROBEs support. diff --git a/Documentation/fault-injection/provoke-crashes.txt b/Documentation/fault-injection/provoke-crashes.txt deleted file mode 100644 index 7a9d3d81525b..000000000000 --- a/Documentation/fault-injection/provoke-crashes.txt +++ /dev/null @@ -1,38 +0,0 @@ -The lkdtm module provides an interface to crash or injure the kernel at -predefined crashpoints to evaluate the reliability of crash dumps obtained -using different dumping solutions. The module uses KPROBEs to instrument -crashing points, but can also crash the kernel directly without KRPOBE -support. - - -You can provide the way either through module arguments when inserting -the module, or through a debugfs interface. - -Usage: insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<> - [cpoint_count={>0}] - - recur_count : Recursion level for the stack overflow test. Default is 10. - - cpoint_name : Crash point where the kernel is to be crashed. It can be - one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY, - FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD, - IDE_CORE_CP, DIRECT - - cpoint_type : Indicates the action to be taken on hitting the crash point. - It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW, - CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION, - WRITE_AFTER_FREE, - - cpoint_count : Indicates the number of times the crash point is to be hit - to trigger an action. The default is 10. - -You can also induce failures by mounting debugfs and writing the type to -/provoke-crash/. E.g., - - mount -t debugfs debugfs /mnt - echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY - - -A special file is `DIRECT' which will induce the crash directly without -KPROBE instrumentation. This mode is the only one available when the module -is built on a kernel without KPROBEs support. diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index 4b7a5ab3cec1..13dd893c9f88 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -298,7 +298,7 @@ enabled, a configurable percentage of memory allocations will be made to fail; these failures can be restricted to a specific range of code. Running with fault injection enabled allows the programmer to see how the code responds when things go badly. See -Documentation/fault-injection/fault-injection.txt for more information on +Documentation/fault-injection/fault-injection.rst for more information on how to use this facility. Other kinds of errors can be found with the "sparse" static analysis tool. diff --git a/Documentation/translations/it_IT/process/4.Coding.rst b/Documentation/translations/it_IT/process/4.Coding.rst index c05b89e616dd..a5e36aa60448 100644 --- a/Documentation/translations/it_IT/process/4.Coding.rst +++ b/Documentation/translations/it_IT/process/4.Coding.rst @@ -314,7 +314,7 @@ di allocazione di memoria sarà destinata al fallimento; questi fallimenti possono essere ridotti ad uno specifico pezzo di codice. Procedere con l'inserimento dei fallimenti attivo permette al programmatore di verificare come il codice risponde quando le cose vanno male. Consultate: -Documentation/fault-injection/fault-injection.txt per avere maggiori +Documentation/fault-injection/fault-injection.rst per avere maggiori informazioni su come utilizzare questo strumento. Altre tipologie di errori possono essere riscontrati con lo strumento di diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst index 8bb777941394..b82b1dde3122 100644 --- a/Documentation/translations/zh_CN/process/4.Coding.rst +++ b/Documentation/translations/zh_CN/process/4.Coding.rst @@ -205,7 +205,7 @@ Linus对这个问题给出了最佳答案: 启用故障注入后,内存分配的可配置百分比将失败;这些失败可以限制在特定的代码 范围内。在启用了故障注入的情况下运行,程序员可以看到当情况恶化时代码如何响 应。有关如何使用此工具的详细信息,请参阅 -Documentation/fault-injection/fault-injection.txt。 +Documentation/fault-injection/fault-injection.rst。 使用“sparse”静态分析工具可以发现其他类型的错误。对于sparse,可以警告程序员 用户空间和内核空间地址之间的混淆、big endian和small endian数量的混合、在需 diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 8a1428d4f138..bba49abb6750 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -15,7 +15,7 @@ * * Debugfs support added by Simon Kagstrom * - * See Documentation/fault-injection/provoke-crashes.txt for instructions + * See Documentation/fault-injection/provoke-crashes.rst for instructions */ #include "lkdtm.h" #include diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 7e6c77740413..e525f6957c49 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -11,7 +11,7 @@ /* * For explanation of the elements of this struct, see - * Documentation/fault-injection/fault-injection.txt + * Documentation/fault-injection/fault-injection.rst */ struct fault_attr { unsigned long probability; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cbdfae379896..4d42a9a6006d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1701,7 +1701,7 @@ config LKDTM called lkdtm. Documentation on how to use the module can be found in - Documentation/fault-injection/provoke-crashes.txt + Documentation/fault-injection/provoke-crashes.rst config TEST_LIST_SORT tristate "Linked list sorting test" diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh index 29a6c63c5a15..78dac34264be 100644 --- a/tools/testing/fault-injection/failcmd.sh +++ b/tools/testing/fault-injection/failcmd.sh @@ -42,7 +42,7 @@ OPTIONS --interval=value, --space=value, --verbose=value, --task-filter=value, --stacktrace-depth=value, --require-start=value, --require-end=value, --reject-start=value, --reject-end=value, --ignore-gfp-wait=value - See Documentation/fault-injection/fault-injection.txt for more + See Documentation/fault-injection/fault-injection.rst for more information failslab options: -- cgit v1.2.3 From 99c8b231ae6c6ca4ca2fd1c0b3701071f589661f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 12 Jun 2019 14:52:41 -0300 Subject: docs: cgroup-v1: convert docs to ReST and rename to *.rst Convert the cgroup-v1 files to ReST format, in order to allow a later addition to the admin-guide. The conversion is actually: - add blank lines and identation in order to identify paragraphs; - fix tables markups; - add some lists markups; - mark literal blocks; - adjust title markups. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Acked-by: Tejun Heo Signed-off-by: Tejun Heo --- Documentation/admin-guide/hw-vuln/l1tf.rst | 2 +- Documentation/admin-guide/kernel-parameters.txt | 4 +- .../admin-guide/mm/numa_memory_policy.rst | 2 +- Documentation/block/bfq-iosched.txt | 2 +- Documentation/cgroup-v1/blkio-controller.rst | 391 ++++++++ Documentation/cgroup-v1/blkio-controller.txt | 375 -------- Documentation/cgroup-v1/cgroups.rst | 695 ++++++++++++++ Documentation/cgroup-v1/cgroups.txt | 677 ------------- Documentation/cgroup-v1/cpuacct.rst | 50 + Documentation/cgroup-v1/cpuacct.txt | 49 - Documentation/cgroup-v1/cpusets.rst | 866 +++++++++++++++++ Documentation/cgroup-v1/cpusets.txt | 839 ---------------- Documentation/cgroup-v1/devices.rst | 132 +++ Documentation/cgroup-v1/devices.txt | 116 --- Documentation/cgroup-v1/freezer-subsystem.rst | 127 +++ Documentation/cgroup-v1/freezer-subsystem.txt | 123 --- Documentation/cgroup-v1/hugetlb.rst | 50 + Documentation/cgroup-v1/hugetlb.txt | 49 - Documentation/cgroup-v1/index.rst | 30 + Documentation/cgroup-v1/memcg_test.rst | 355 +++++++ Documentation/cgroup-v1/memcg_test.txt | 280 ------ Documentation/cgroup-v1/memory.rst | 1003 ++++++++++++++++++++ Documentation/cgroup-v1/memory.txt | 892 ----------------- Documentation/cgroup-v1/net_cls.rst | 44 + Documentation/cgroup-v1/net_cls.txt | 39 - Documentation/cgroup-v1/net_prio.rst | 57 ++ Documentation/cgroup-v1/net_prio.txt | 55 -- Documentation/cgroup-v1/pids.rst | 92 ++ Documentation/cgroup-v1/pids.txt | 88 -- Documentation/cgroup-v1/rdma.rst | 117 +++ Documentation/cgroup-v1/rdma.txt | 109 --- Documentation/filesystems/tmpfs.txt | 2 +- Documentation/scheduler/sched-deadline.txt | 2 +- Documentation/scheduler/sched-design-CFS.txt | 2 +- Documentation/scheduler/sched-rt-group.txt | 2 +- Documentation/vm/numa.rst | 4 +- Documentation/vm/page_migration.rst | 2 +- Documentation/vm/unevictable-lru.rst | 2 +- Documentation/x86/x86_64/fake-numa-for-cpusets.rst | 4 +- MAINTAINERS | 2 +- block/Kconfig | 2 +- include/linux/cgroup-defs.h | 2 +- include/uapi/linux/bpf.h | 2 +- init/Kconfig | 2 +- kernel/cgroup/cpuset.c | 2 +- security/device_cgroup.c | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 47 files changed, 4032 insertions(+), 3714 deletions(-) create mode 100644 Documentation/cgroup-v1/blkio-controller.rst delete mode 100644 Documentation/cgroup-v1/blkio-controller.txt create mode 100644 Documentation/cgroup-v1/cgroups.rst delete mode 100644 Documentation/cgroup-v1/cgroups.txt create mode 100644 Documentation/cgroup-v1/cpuacct.rst delete mode 100644 Documentation/cgroup-v1/cpuacct.txt create mode 100644 Documentation/cgroup-v1/cpusets.rst delete mode 100644 Documentation/cgroup-v1/cpusets.txt create mode 100644 Documentation/cgroup-v1/devices.rst delete mode 100644 Documentation/cgroup-v1/devices.txt create mode 100644 Documentation/cgroup-v1/freezer-subsystem.rst delete mode 100644 Documentation/cgroup-v1/freezer-subsystem.txt create mode 100644 Documentation/cgroup-v1/hugetlb.rst delete mode 100644 Documentation/cgroup-v1/hugetlb.txt create mode 100644 Documentation/cgroup-v1/index.rst create mode 100644 Documentation/cgroup-v1/memcg_test.rst delete mode 100644 Documentation/cgroup-v1/memcg_test.txt create mode 100644 Documentation/cgroup-v1/memory.rst delete mode 100644 Documentation/cgroup-v1/memory.txt create mode 100644 Documentation/cgroup-v1/net_cls.rst delete mode 100644 Documentation/cgroup-v1/net_cls.txt create mode 100644 Documentation/cgroup-v1/net_prio.rst delete mode 100644 Documentation/cgroup-v1/net_prio.txt create mode 100644 Documentation/cgroup-v1/pids.rst delete mode 100644 Documentation/cgroup-v1/pids.txt create mode 100644 Documentation/cgroup-v1/rdma.rst delete mode 100644 Documentation/cgroup-v1/rdma.txt (limited to 'include/linux') diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst index 31653a9f0e1b..656aee262e23 100644 --- a/Documentation/admin-guide/hw-vuln/l1tf.rst +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst @@ -241,7 +241,7 @@ Guest mitigation mechanisms For further information about confining guests to a single or to a group of cores consult the cpusets documentation: - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.rst .. _interrupt_isolation: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..da0e84ecee32 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4078,7 +4078,7 @@ relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. - See Documentation/cgroup-v1/cpusets.txt. + See Documentation/cgroup-v1/cpusets.rst. reserve= [KNL,BUGS] Force kernel to ignore I/O ports or memory Format: ,[,,,...] @@ -4588,7 +4588,7 @@ swapaccount=[0|1] [KNL] Enable accounting of swap in memory resource controller if no parameter or 1 is given or disable - it if 0 is given (See Documentation/cgroup-v1/memory.txt) + it if 0 is given (See Documentation/cgroup-v1/memory.rst) swiotlb= [ARM,IA-64,PPC,MIPS,X86] Format: { | force | noforce } diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst index d78c5b315f72..546f174e5d6a 100644 --- a/Documentation/admin-guide/mm/numa_memory_policy.rst +++ b/Documentation/admin-guide/mm/numa_memory_policy.rst @@ -15,7 +15,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy support. Memory policies should not be confused with cpusets -(``Documentation/cgroup-v1/cpusets.txt``) +(``Documentation/cgroup-v1/cpusets.rst``) which is an administrative mechanism for restricting the nodes from which memory may be allocated by a set of processes. Memory policies are a programming interface that a NUMA-aware application can take advantage of. When diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 1a0f2ac02eb6..b2265cf6c9c3 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt @@ -539,7 +539,7 @@ As for cgroups-v1 (blkio controller), the exact set of stat files created, and kept up-to-date by bfq, depends on whether CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all the stat files documented in -Documentation/cgroup-v1/blkio-controller.txt. If, instead, +Documentation/cgroup-v1/blkio-controller.rst. If, instead, CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files blkio.bfq.io_service_bytes blkio.bfq.io_service_bytes_recursive diff --git a/Documentation/cgroup-v1/blkio-controller.rst b/Documentation/cgroup-v1/blkio-controller.rst new file mode 100644 index 000000000000..2c1b907afc14 --- /dev/null +++ b/Documentation/cgroup-v1/blkio-controller.rst @@ -0,0 +1,391 @@ +=================== +Block IO Controller +=================== + +Overview +======== +cgroup subsys "blkio" implements the block io controller. There seems to be +a need of various kinds of IO control policies (like proportional BW, max BW) +both at leaf nodes as well as at intermediate nodes in a storage hierarchy. +Plan is to use the same cgroup based management interface for blkio controller +and based on user options switch IO policies in the background. + +Currently two IO control policies are implemented. First one is proportional +weight time based division of disk policy. It is implemented in CFQ. Hence +this policy takes effect only on leaf nodes when CFQ is being used. The second +one is throttling policy which can be used to specify upper IO rate limits +on devices. This policy is implemented in generic block layer and can be +used on leaf nodes as well as higher level logical devices like device mapper. + +HOWTO +===== +Proportional Weight division of bandwidth +----------------------------------------- +You can do a very simple testing of running two dd threads in two different +cgroups. Here is what you can do. + +- Enable Block IO controller:: + + CONFIG_BLK_CGROUP=y + +- Enable group scheduling in CFQ: + + + CONFIG_CFQ_GROUP_IOSCHED=y + +- Compile and boot into kernel and mount IO controller (blkio); see + cgroups.txt, Why are cgroups needed?. + + :: + + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/blkio + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio + +- Create two cgroups:: + + mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 + +- Set weights of group test1 and test2:: + + echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight + echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight + +- Create two same size files (say 512MB each) on same disk (file1, file2) and + launch two dd threads in different cgroup to read those files:: + + sync + echo 3 > /proc/sys/vm/drop_caches + + dd if=/mnt/sdb/zerofile1 of=/dev/null & + echo $! > /sys/fs/cgroup/blkio/test1/tasks + cat /sys/fs/cgroup/blkio/test1/tasks + + dd if=/mnt/sdb/zerofile2 of=/dev/null & + echo $! > /sys/fs/cgroup/blkio/test2/tasks + cat /sys/fs/cgroup/blkio/test2/tasks + +- At macro level, first dd should finish first. To get more precise data, keep + on looking at (with the help of script), at blkio.disk_time and + blkio.disk_sectors files of both test1 and test2 groups. This will tell how + much disk time (in milliseconds), each group got and how many sectors each + group dispatched to the disk. We provide fairness in terms of disk time, so + ideally io.disk_time of cgroups should be in proportion to the weight. + +Throttling/Upper Limit policy +----------------------------- +- Enable Block IO controller:: + + CONFIG_BLK_CGROUP=y + +- Enable throttling in block layer:: + + CONFIG_BLK_DEV_THROTTLING=y + +- Mount blkio controller (see cgroups.txt, Why are cgroups needed?):: + + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio + +- Specify a bandwidth rate on particular device for root group. The format + for policy is ": ":: + + echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device + + Above will put a limit of 1MB/second on reads happening for root group + on device having major/minor number 8:16. + +- Run dd to read a file and see if rate is throttled to 1MB/s or not:: + + # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024 + 1024+0 records in + 1024+0 records out + 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s + + Limits for writes can be put using blkio.throttle.write_bps_device file. + +Hierarchical Cgroups +==================== + +Both CFQ and throttling implement hierarchy support; however, +throttling's hierarchy support is enabled iff "sane_behavior" is +enabled from cgroup side, which currently is a development option and +not publicly available. + +If somebody created a hierarchy like as follows:: + + root + / \ + test1 test2 + | + test3 + +CFQ by default and throttling with "sane_behavior" will handle the +hierarchy correctly. For details on CFQ hierarchy support, refer to +Documentation/block/cfq-iosched.txt. For throttling, all limits apply +to the whole subtree while all statistics are local to the IOs +directly generated by tasks in that cgroup. + +Throttling without "sane_behavior" enabled from cgroup side will +practically treat all groups at same level as if it looks like the +following:: + + pivot + / / \ \ + root test1 test2 test3 + +Various user visible config options +=================================== +CONFIG_BLK_CGROUP + - Block IO controller. + +CONFIG_DEBUG_BLK_CGROUP + - Debug help. Right now some additional stats file show up in cgroup + if this option is enabled. + +CONFIG_CFQ_GROUP_IOSCHED + - Enables group scheduling in CFQ. Currently only 1 level of group + creation is allowed. + +CONFIG_BLK_DEV_THROTTLING + - Enable block device throttling support in block layer. + +Details of cgroup files +======================= +Proportional weight policy files +-------------------------------- +- blkio.weight + - Specifies per cgroup weight. This is default weight of the group + on all the devices until and unless overridden by per device rule. + (See blkio.weight_device). + Currently allowed range of weights is from 10 to 1000. + +- blkio.weight_device + - One can specify per cgroup per device rules using this interface. + These rules override the default value of group weight as specified + by blkio.weight. + + Following is the format:: + + # echo dev_maj:dev_minor weight > blkio.weight_device + + Configure weight=300 on /dev/sdb (8:16) in this cgroup:: + + # echo 8:16 300 > blkio.weight_device + # cat blkio.weight_device + dev weight + 8:16 300 + + Configure weight=500 on /dev/sda (8:0) in this cgroup:: + + # echo 8:0 500 > blkio.weight_device + # cat blkio.weight_device + dev weight + 8:0 500 + 8:16 300 + + Remove specific weight for /dev/sda in this cgroup:: + + # echo 8:0 0 > blkio.weight_device + # cat blkio.weight_device + dev weight + 8:16 300 + +- blkio.leaf_weight[_device] + - Equivalents of blkio.weight[_device] for the purpose of + deciding how much weight tasks in the given cgroup has while + competing with the cgroup's child cgroups. For details, + please refer to Documentation/block/cfq-iosched.txt. + +- blkio.time + - disk time allocated to cgroup per device in milliseconds. First + two fields specify the major and minor number of the device and + third field specifies the disk time allocated to group in + milliseconds. + +- blkio.sectors + - number of sectors transferred to/from disk by the group. First + two fields specify the major and minor number of the device and + third field specifies the number of sectors transferred by the + group to/from the device. + +- blkio.io_service_bytes + - Number of bytes transferred to/from the disk by the group. These + are further divided by the type of operation - read or write, sync + or async. First two fields specify the major and minor number of the + device, third field specifies the operation type and the fourth field + specifies the number of bytes. + +- blkio.io_serviced + - Number of IOs (bio) issued to the disk by the group. These + are further divided by the type of operation - read or write, sync + or async. First two fields specify the major and minor number of the + device, third field specifies the operation type and the fourth field + specifies the number of IOs. + +- blkio.io_service_time + - Total amount of time between request dispatch and request completion + for the IOs done by this cgroup. This is in nanoseconds to make it + meaningful for flash devices too. For devices with queue depth of 1, + this time represents the actual service time. When queue_depth > 1, + that is no longer true as requests may be served out of order. This + may cause the service time for a given IO to include the service time + of multiple IOs when served out of order which may result in total + io_service_time > actual time elapsed. This time is further divided by + the type of operation - read or write, sync or async. First two fields + specify the major and minor number of the device, third field + specifies the operation type and the fourth field specifies the + io_service_time in ns. + +- blkio.io_wait_time + - Total amount of time the IOs for this cgroup spent waiting in the + scheduler queues for service. This can be greater than the total time + elapsed since it is cumulative io_wait_time for all IOs. It is not a + measure of total time the cgroup spent waiting but rather a measure of + the wait_time for its individual IOs. For devices with queue_depth > 1 + this metric does not include the time spent waiting for service once + the IO is dispatched to the device but till it actually gets serviced + (there might be a time lag here due to re-ordering of requests by the + device). This is in nanoseconds to make it meaningful for flash + devices too. This time is further divided by the type of operation - + read or write, sync or async. First two fields specify the major and + minor number of the device, third field specifies the operation type + and the fourth field specifies the io_wait_time in ns. + +- blkio.io_merged + - Total number of bios/requests merged into requests belonging to this + cgroup. This is further divided by the type of operation - read or + write, sync or async. + +- blkio.io_queued + - Total number of requests queued up at any given instant for this + cgroup. This is further divided by the type of operation - read or + write, sync or async. + +- blkio.avg_queue_size + - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. + The average queue size for this cgroup over the entire time of this + cgroup's existence. Queue size samples are taken each time one of the + queues of this cgroup gets a timeslice. + +- blkio.group_wait_time + - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. + This is the amount of time the cgroup had to wait since it became busy + (i.e., went from 0 to 1 request queued) to get a timeslice for one of + its queues. This is different from the io_wait_time which is the + cumulative total of the amount of time spent by each IO in that cgroup + waiting in the scheduler queue. This is in nanoseconds. If this is + read when the cgroup is in a waiting (for timeslice) state, the stat + will only report the group_wait_time accumulated till the last time it + got a timeslice and will not include the current delta. + +- blkio.empty_time + - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. + This is the amount of time a cgroup spends without any pending + requests when not being served, i.e., it does not include any time + spent idling for one of the queues of the cgroup. This is in + nanoseconds. If this is read when the cgroup is in an empty state, + the stat will only report the empty_time accumulated till the last + time it had a pending request and will not include the current delta. + +- blkio.idle_time + - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. + This is the amount of time spent by the IO scheduler idling for a + given cgroup in anticipation of a better request than the existing ones + from other queues/cgroups. This is in nanoseconds. If this is read + when the cgroup is in an idling state, the stat will only report the + idle_time accumulated till the last idle period and will not include + the current delta. + +- blkio.dequeue + - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This + gives the statistics about how many a times a group was dequeued + from service tree of the device. First two fields specify the major + and minor number of the device and third field specifies the number + of times a group was dequeued from a particular device. + +- blkio.*_recursive + - Recursive version of various stats. These files show the + same information as their non-recursive counterparts but + include stats from all the descendant cgroups. + +Throttling/Upper limit policy files +----------------------------------- +- blkio.throttle.read_bps_device + - Specifies upper limit on READ rate from the device. IO rate is + specified in bytes per second. Rules are per device. Following is + the format:: + + echo ": " > /cgrp/blkio.throttle.read_bps_device + +- blkio.throttle.write_bps_device + - Specifies upper limit on WRITE rate to the device. IO rate is + specified in bytes per second. Rules are per device. Following is + the format:: + + echo ": " > /cgrp/blkio.throttle.write_bps_device + +- blkio.throttle.read_iops_device + - Specifies upper limit on READ rate from the device. IO rate is + specified in IO per second. Rules are per device. Following is + the format:: + + echo ": " > /cgrp/blkio.throttle.read_iops_device + +- blkio.throttle.write_iops_device + - Specifies upper limit on WRITE rate to the device. IO rate is + specified in io per second. Rules are per device. Following is + the format:: + + echo ": " > /cgrp/blkio.throttle.write_iops_device + +Note: If both BW and IOPS rules are specified for a device, then IO is + subjected to both the constraints. + +- blkio.throttle.io_serviced + - Number of IOs (bio) issued to the disk by the group. These + are further divided by the type of operation - read or write, sync + or async. First two fields specify the major and minor number of the + device, third field specifies the operation type and the fourth field + specifies the number of IOs. + +- blkio.throttle.io_service_bytes + - Number of bytes transferred to/from the disk by the group. These + are further divided by the type of operation - read or write, sync + or async. First two fields specify the major and minor number of the + device, third field specifies the operation type and the fourth field + specifies the number of bytes. + +Common files among various policies +----------------------------------- +- blkio.reset_stats + - Writing an int to this file will result in resetting all the stats + for that cgroup. + +CFQ sysfs tunable +================= +/sys/block//queue/iosched/slice_idle +------------------------------------------ +On a faster hardware CFQ can be slow, especially with sequential workload. +This happens because CFQ idles on a single queue and single queue might not +drive deeper request queue depths to keep the storage busy. In such scenarios +one can try setting slice_idle=0 and that would switch CFQ to IOPS +(IO operations per second) mode on NCQ supporting hardware. + +That means CFQ will not idle between cfq queues of a cfq group and hence be +able to driver higher queue depth and achieve better throughput. That also +means that cfq provides fairness among groups in terms of IOPS and not in +terms of disk time. + +/sys/block//queue/iosched/group_idle +------------------------------------------ +If one disables idling on individual cfq queues and cfq service trees by +setting slice_idle=0, group_idle kicks in. That means CFQ will still idle +on the group in an attempt to provide fairness among groups. + +By default group_idle is same as slice_idle and does not do anything if +slice_idle is enabled. + +One can experience an overall throughput drop if you have created multiple +groups and put applications in that group which are not driving enough +IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle +on individual groups and throughput should improve. diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt deleted file mode 100644 index 673dc34d3f78..000000000000 --- a/Documentation/cgroup-v1/blkio-controller.txt +++ /dev/null @@ -1,375 +0,0 @@ - Block IO Controller - =================== -Overview -======== -cgroup subsys "blkio" implements the block io controller. There seems to be -a need of various kinds of IO control policies (like proportional BW, max BW) -both at leaf nodes as well as at intermediate nodes in a storage hierarchy. -Plan is to use the same cgroup based management interface for blkio controller -and based on user options switch IO policies in the background. - -Currently two IO control policies are implemented. First one is proportional -weight time based division of disk policy. It is implemented in CFQ. Hence -this policy takes effect only on leaf nodes when CFQ is being used. The second -one is throttling policy which can be used to specify upper IO rate limits -on devices. This policy is implemented in generic block layer and can be -used on leaf nodes as well as higher level logical devices like device mapper. - -HOWTO -===== -Proportional Weight division of bandwidth ------------------------------------------ -You can do a very simple testing of running two dd threads in two different -cgroups. Here is what you can do. - -- Enable Block IO controller - CONFIG_BLK_CGROUP=y - -- Enable group scheduling in CFQ - CONFIG_CFQ_GROUP_IOSCHED=y - -- Compile and boot into kernel and mount IO controller (blkio); see - cgroups.txt, Why are cgroups needed?. - - mount -t tmpfs cgroup_root /sys/fs/cgroup - mkdir /sys/fs/cgroup/blkio - mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - -- Create two cgroups - mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 - -- Set weights of group test1 and test2 - echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight - echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight - -- Create two same size files (say 512MB each) on same disk (file1, file2) and - launch two dd threads in different cgroup to read those files. - - sync - echo 3 > /proc/sys/vm/drop_caches - - dd if=/mnt/sdb/zerofile1 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test1/tasks - cat /sys/fs/cgroup/blkio/test1/tasks - - dd if=/mnt/sdb/zerofile2 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test2/tasks - cat /sys/fs/cgroup/blkio/test2/tasks - -- At macro level, first dd should finish first. To get more precise data, keep - on looking at (with the help of script), at blkio.disk_time and - blkio.disk_sectors files of both test1 and test2 groups. This will tell how - much disk time (in milliseconds), each group got and how many sectors each - group dispatched to the disk. We provide fairness in terms of disk time, so - ideally io.disk_time of cgroups should be in proportion to the weight. - -Throttling/Upper Limit policy ------------------------------ -- Enable Block IO controller - CONFIG_BLK_CGROUP=y - -- Enable throttling in block layer - CONFIG_BLK_DEV_THROTTLING=y - -- Mount blkio controller (see cgroups.txt, Why are cgroups needed?) - mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - -- Specify a bandwidth rate on particular device for root group. The format - for policy is ": ". - - echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device - - Above will put a limit of 1MB/second on reads happening for root group - on device having major/minor number 8:16. - -- Run dd to read a file and see if rate is throttled to 1MB/s or not. - - # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024 - 1024+0 records in - 1024+0 records out - 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s - - Limits for writes can be put using blkio.throttle.write_bps_device file. - -Hierarchical Cgroups -==================== - -Both CFQ and throttling implement hierarchy support; however, -throttling's hierarchy support is enabled iff "sane_behavior" is -enabled from cgroup side, which currently is a development option and -not publicly available. - -If somebody created a hierarchy like as follows. - - root - / \ - test1 test2 - | - test3 - -CFQ by default and throttling with "sane_behavior" will handle the -hierarchy correctly. For details on CFQ hierarchy support, refer to -Documentation/block/cfq-iosched.txt. For throttling, all limits apply -to the whole subtree while all statistics are local to the IOs -directly generated by tasks in that cgroup. - -Throttling without "sane_behavior" enabled from cgroup side will -practically treat all groups at same level as if it looks like the -following. - - pivot - / / \ \ - root test1 test2 test3 - -Various user visible config options -=================================== -CONFIG_BLK_CGROUP - - Block IO controller. - -CONFIG_DEBUG_BLK_CGROUP - - Debug help. Right now some additional stats file show up in cgroup - if this option is enabled. - -CONFIG_CFQ_GROUP_IOSCHED - - Enables group scheduling in CFQ. Currently only 1 level of group - creation is allowed. - -CONFIG_BLK_DEV_THROTTLING - - Enable block device throttling support in block layer. - -Details of cgroup files -======================= -Proportional weight policy files --------------------------------- -- blkio.weight - - Specifies per cgroup weight. This is default weight of the group - on all the devices until and unless overridden by per device rule. - (See blkio.weight_device). - Currently allowed range of weights is from 10 to 1000. - -- blkio.weight_device - - One can specify per cgroup per device rules using this interface. - These rules override the default value of group weight as specified - by blkio.weight. - - Following is the format. - - # echo dev_maj:dev_minor weight > blkio.weight_device - Configure weight=300 on /dev/sdb (8:16) in this cgroup - # echo 8:16 300 > blkio.weight_device - # cat blkio.weight_device - dev weight - 8:16 300 - - Configure weight=500 on /dev/sda (8:0) in this cgroup - # echo 8:0 500 > blkio.weight_device - # cat blkio.weight_device - dev weight - 8:0 500 - 8:16 300 - - Remove specific weight for /dev/sda in this cgroup - # echo 8:0 0 > blkio.weight_device - # cat blkio.weight_device - dev weight - 8:16 300 - -- blkio.leaf_weight[_device] - - Equivalents of blkio.weight[_device] for the purpose of - deciding how much weight tasks in the given cgroup has while - competing with the cgroup's child cgroups. For details, - please refer to Documentation/block/cfq-iosched.txt. - -- blkio.time - - disk time allocated to cgroup per device in milliseconds. First - two fields specify the major and minor number of the device and - third field specifies the disk time allocated to group in - milliseconds. - -- blkio.sectors - - number of sectors transferred to/from disk by the group. First - two fields specify the major and minor number of the device and - third field specifies the number of sectors transferred by the - group to/from the device. - -- blkio.io_service_bytes - - Number of bytes transferred to/from the disk by the group. These - are further divided by the type of operation - read or write, sync - or async. First two fields specify the major and minor number of the - device, third field specifies the operation type and the fourth field - specifies the number of bytes. - -- blkio.io_serviced - - Number of IOs (bio) issued to the disk by the group. These - are further divided by the type of operation - read or write, sync - or async. First two fields specify the major and minor number of the - device, third field specifies the operation type and the fourth field - specifies the number of IOs. - -- blkio.io_service_time - - Total amount of time between request dispatch and request completion - for the IOs done by this cgroup. This is in nanoseconds to make it - meaningful for flash devices too. For devices with queue depth of 1, - this time represents the actual service time. When queue_depth > 1, - that is no longer true as requests may be served out of order. This - may cause the service time for a given IO to include the service time - of multiple IOs when served out of order which may result in total - io_service_time > actual time elapsed. This time is further divided by - the type of operation - read or write, sync or async. First two fields - specify the major and minor number of the device, third field - specifies the operation type and the fourth field specifies the - io_service_time in ns. - -- blkio.io_wait_time - - Total amount of time the IOs for this cgroup spent waiting in the - scheduler queues for service. This can be greater than the total time - elapsed since it is cumulative io_wait_time for all IOs. It is not a - measure of total time the cgroup spent waiting but rather a measure of - the wait_time for its individual IOs. For devices with queue_depth > 1 - this metric does not include the time spent waiting for service once - the IO is dispatched to the device but till it actually gets serviced - (there might be a time lag here due to re-ordering of requests by the - device). This is in nanoseconds to make it meaningful for flash - devices too. This time is further divided by the type of operation - - read or write, sync or async. First two fields specify the major and - minor number of the device, third field specifies the operation type - and the fourth field specifies the io_wait_time in ns. - -- blkio.io_merged - - Total number of bios/requests merged into requests belonging to this - cgroup. This is further divided by the type of operation - read or - write, sync or async. - -- blkio.io_queued - - Total number of requests queued up at any given instant for this - cgroup. This is further divided by the type of operation - read or - write, sync or async. - -- blkio.avg_queue_size - - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. - The average queue size for this cgroup over the entire time of this - cgroup's existence. Queue size samples are taken each time one of the - queues of this cgroup gets a timeslice. - -- blkio.group_wait_time - - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. - This is the amount of time the cgroup had to wait since it became busy - (i.e., went from 0 to 1 request queued) to get a timeslice for one of - its queues. This is different from the io_wait_time which is the - cumulative total of the amount of time spent by each IO in that cgroup - waiting in the scheduler queue. This is in nanoseconds. If this is - read when the cgroup is in a waiting (for timeslice) state, the stat - will only report the group_wait_time accumulated till the last time it - got a timeslice and will not include the current delta. - -- blkio.empty_time - - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. - This is the amount of time a cgroup spends without any pending - requests when not being served, i.e., it does not include any time - spent idling for one of the queues of the cgroup. This is in - nanoseconds. If this is read when the cgroup is in an empty state, - the stat will only report the empty_time accumulated till the last - time it had a pending request and will not include the current delta. - -- blkio.idle_time - - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. - This is the amount of time spent by the IO scheduler idling for a - given cgroup in anticipation of a better request than the existing ones - from other queues/cgroups. This is in nanoseconds. If this is read - when the cgroup is in an idling state, the stat will only report the - idle_time accumulated till the last idle period and will not include - the current delta. - -- blkio.dequeue - - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This - gives the statistics about how many a times a group was dequeued - from service tree of the device. First two fields specify the major - and minor number of the device and third field specifies the number - of times a group was dequeued from a particular device. - -- blkio.*_recursive - - Recursive version of various stats. These files show the - same information as their non-recursive counterparts but - include stats from all the descendant cgroups. - -Throttling/Upper limit policy files ------------------------------------ -- blkio.throttle.read_bps_device - - Specifies upper limit on READ rate from the device. IO rate is - specified in bytes per second. Rules are per device. Following is - the format. - - echo ": " > /cgrp/blkio.throttle.read_bps_device - -- blkio.throttle.write_bps_device - - Specifies upper limit on WRITE rate to the device. IO rate is - specified in bytes per second. Rules are per device. Following is - the format. - - echo ": " > /cgrp/blkio.throttle.write_bps_device - -- blkio.throttle.read_iops_device - - Specifies upper limit on READ rate from the device. IO rate is - specified in IO per second. Rules are per device. Following is - the format. - - echo ": " > /cgrp/blkio.throttle.read_iops_device - -- blkio.throttle.write_iops_device - - Specifies upper limit on WRITE rate to the device. IO rate is - specified in io per second. Rules are per device. Following is - the format. - - echo ": " > /cgrp/blkio.throttle.write_iops_device - -Note: If both BW and IOPS rules are specified for a device, then IO is - subjected to both the constraints. - -- blkio.throttle.io_serviced - - Number of IOs (bio) issued to the disk by the group. These - are further divided by the type of operation - read or write, sync - or async. First two fields specify the major and minor number of the - device, third field specifies the operation type and the fourth field - specifies the number of IOs. - -- blkio.throttle.io_service_bytes - - Number of bytes transferred to/from the disk by the group. These - are further divided by the type of operation - read or write, sync - or async. First two fields specify the major and minor number of the - device, third field specifies the operation type and the fourth field - specifies the number of bytes. - -Common files among various policies ------------------------------------ -- blkio.reset_stats - - Writing an int to this file will result in resetting all the stats - for that cgroup. - -CFQ sysfs tunable -================= -/sys/block//queue/iosched/slice_idle ------------------------------------------- -On a faster hardware CFQ can be slow, especially with sequential workload. -This happens because CFQ idles on a single queue and single queue might not -drive deeper request queue depths to keep the storage busy. In such scenarios -one can try setting slice_idle=0 and that would switch CFQ to IOPS -(IO operations per second) mode on NCQ supporting hardware. - -That means CFQ will not idle between cfq queues of a cfq group and hence be -able to driver higher queue depth and achieve better throughput. That also -means that cfq provides fairness among groups in terms of IOPS and not in -terms of disk time. - -/sys/block//queue/iosched/group_idle ------------------------------------------- -If one disables idling on individual cfq queues and cfq service trees by -setting slice_idle=0, group_idle kicks in. That means CFQ will still idle -on the group in an attempt to provide fairness among groups. - -By default group_idle is same as slice_idle and does not do anything if -slice_idle is enabled. - -One can experience an overall throughput drop if you have created multiple -groups and put applications in that group which are not driving enough -IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle -on individual groups and throughput should improve. diff --git a/Documentation/cgroup-v1/cgroups.rst b/Documentation/cgroup-v1/cgroups.rst new file mode 100644 index 000000000000..46bbe7e022d4 --- /dev/null +++ b/Documentation/cgroup-v1/cgroups.rst @@ -0,0 +1,695 @@ +============== +Control Groups +============== + +Written by Paul Menage based on +Documentation/cgroup-v1/cpusets.rst + +Original copyright statements from cpusets.txt: + +Portions Copyright (C) 2004 BULL SA. + +Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. + +Modified by Paul Jackson + +Modified by Christoph Lameter + +.. CONTENTS: + + 1. Control Groups + 1.1 What are cgroups ? + 1.2 Why are cgroups needed ? + 1.3 How are cgroups implemented ? + 1.4 What does notify_on_release do ? + 1.5 What does clone_children do ? + 1.6 How do I use cgroups ? + 2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Attaching processes + 2.3 Mounting hierarchies by name + 3. Kernel API + 3.1 Overview + 3.2 Synchronization + 3.3 Subsystem API + 4. Extended attributes usage + 5. Questions + +1. Control Groups +================= + +1.1 What are cgroups ? +---------------------- + +Control Groups provide a mechanism for aggregating/partitioning sets of +tasks, and all their future children, into hierarchical groups with +specialized behaviour. + +Definitions: + +A *cgroup* associates a set of tasks with a set of parameters for one +or more subsystems. + +A *subsystem* is a module that makes use of the task grouping +facilities provided by cgroups to treat groups of tasks in +particular ways. A subsystem is typically a "resource controller" that +schedules a resource or applies per-cgroup limits, but it may be +anything that wants to act on a group of processes, e.g. a +virtualization subsystem. + +A *hierarchy* is a set of cgroups arranged in a tree, such that +every task in the system is in exactly one of the cgroups in the +hierarchy, and a set of subsystems; each subsystem has system-specific +state attached to each cgroup in the hierarchy. Each hierarchy has +an instance of the cgroup virtual filesystem associated with it. + +At any one time there may be multiple active hierarchies of task +cgroups. Each hierarchy is a partition of all tasks in the system. + +User-level code may create and destroy cgroups by name in an +instance of the cgroup virtual file system, specify and query to +which cgroup a task is assigned, and list the task PIDs assigned to +a cgroup. Those creations and assignments only affect the hierarchy +associated with that instance of the cgroup file system. + +On their own, the only use for cgroups is for simple job +tracking. The intention is that other subsystems hook into the generic +cgroup support to provide new attributes for cgroups, such as +accounting/limiting the resources which processes in a cgroup can +access. For example, cpusets (see Documentation/cgroup-v1/cpusets.rst) allow +you to associate a set of CPUs and a set of memory nodes with the +tasks in each cgroup. + +1.2 Why are cgroups needed ? +---------------------------- + +There are multiple efforts to provide process aggregations in the +Linux kernel, mainly for resource-tracking purposes. Such efforts +include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server +namespaces. These all require the basic notion of a +grouping/partitioning of processes, with newly forked processes ending +up in the same group (cgroup) as their parent process. + +The kernel cgroup patch provides the minimum essential kernel +mechanisms required to efficiently implement such groups. It has +minimal impact on the system fast paths, and provides hooks for +specific subsystems such as cpusets to provide additional behaviour as +desired. + +Multiple hierarchy support is provided to allow for situations where +the division of tasks into cgroups is distinctly different for +different subsystems - having parallel hierarchies allows each +hierarchy to be a natural division of tasks, without having to handle +complex combinations of tasks that would be present if several +unrelated subsystems needed to be forced into the same tree of +cgroups. + +At one extreme, each resource controller or subsystem could be in a +separate hierarchy; at the other extreme, all subsystems +would be attached to the same hierarchy. + +As an example of a scenario (originally proposed by vatsa@in.ibm.com) +that can benefit from multiple hierarchies, consider a large +university server with various users - students, professors, system +tasks etc. The resource planning for this server could be along the +following lines:: + + CPU : "Top cpuset" + / \ + CPUSet1 CPUSet2 + | | + (Professors) (Students) + + In addition (system tasks) are attached to topcpuset (so + that they can run anywhere) with a limit of 20% + + Memory : Professors (50%), Students (30%), system (20%) + + Disk : Professors (50%), Students (30%), system (20%) + + Network : WWW browsing (20%), Network File System (60%), others (20%) + / \ + Professors (15%) students (5%) + +Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes +into the NFS network class. + +At the same time Firefox/Lynx will share an appropriate CPU/Memory class +depending on who launched it (prof/student). + +With the ability to classify tasks differently for different resources +(by putting those resource subsystems in different hierarchies), +the admin can easily set up a script which receives exec notifications +and depending on who is launching the browser he can:: + + # echo browser_pid > /sys/fs/cgroup///tasks + +With only a single hierarchy, he now would potentially have to create +a separate cgroup for every browser launched and associate it with +appropriate network and other resource class. This may lead to +proliferation of such cgroups. + +Also let's say that the administrator would like to give enhanced network +access temporarily to a student's browser (since it is night and the user +wants to do online gaming :)) OR give one of the student's simulation +apps enhanced CPU power. + +With ability to write PIDs directly to resource classes, it's just a +matter of:: + + # echo pid > /sys/fs/cgroup/network//tasks + (after some time) + # echo pid > /sys/fs/cgroup/network//tasks + +Without this ability, the administrator would have to split the cgroup into +multiple separate ones and then associate the new cgroups with the +new resource classes. + + + +1.3 How are cgroups implemented ? +--------------------------------- + +Control Groups extends the kernel as follows: + + - Each task in the system has a reference-counted pointer to a + css_set. + + - A css_set contains a set of reference-counted pointers to + cgroup_subsys_state objects, one for each cgroup subsystem + registered in the system. There is no direct link from a task to + the cgroup of which it's a member in each hierarchy, but this + can be determined by following pointers through the + cgroup_subsys_state objects. This is because accessing the + subsystem state is something that's expected to happen frequently + and in performance-critical code, whereas operations that require a + task's actual cgroup assignments (in particular, moving between + cgroups) are less common. A linked list runs through the cg_list + field of each task_struct using the css_set, anchored at + css_set->tasks. + + - A cgroup hierarchy filesystem can be mounted for browsing and + manipulation from user space. + + - You can list all the tasks (by PID) attached to any cgroup. + +The implementation of cgroups requires a few, simple hooks +into the rest of the kernel, none in performance-critical paths: + + - in init/main.c, to initialize the root cgroups and initial + css_set at system boot. + + - in fork and exit, to attach and detach a task from its css_set. + +In addition, a new file system of type "cgroup" may be mounted, to +enable browsing and modifying the cgroups presently known to the +kernel. When mounting a cgroup hierarchy, you may specify a +comma-separated list of subsystems to mount as the filesystem mount +options. By default, mounting the cgroup filesystem attempts to +mount a hierarchy containing all registered subsystems. + +If an active hierarchy with exactly the same set of subsystems already +exists, it will be reused for the new mount. If no existing hierarchy +matches, and any of the requested subsystems are in use in an existing +hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy +is activated, associated with the requested subsystems. + +It's not currently possible to bind a new subsystem to an active +cgroup hierarchy, or to unbind a subsystem from an active cgroup +hierarchy. This may be possible in future, but is fraught with nasty +error-recovery issues. + +When a cgroup filesystem is unmounted, if there are any +child cgroups created below the top-level cgroup, that hierarchy +will remain active even though unmounted; if there are no +child cgroups then the hierarchy will be deactivated. + +No new system calls are added for cgroups - all support for +querying and modifying cgroups is via this cgroup file system. + +Each task under /proc has an added file named 'cgroup' displaying, +for each active hierarchy, the subsystem names and the cgroup name +as the path relative to the root of the cgroup file system. + +Each cgroup is represented by a directory in the cgroup file system +containing the following files describing that cgroup: + + - tasks: list of tasks (by PID) attached to that cgroup. This list + is not guaranteed to be sorted. Writing a thread ID into this file + moves the thread into this cgroup. + - cgroup.procs: list of thread group IDs in the cgroup. This list is + not guaranteed to be sorted or free of duplicate TGIDs, and userspace + should sort/uniquify the list if this property is required. + Writing a thread group ID into this file moves all threads in that + group into this cgroup. + - notify_on_release flag: run the release agent on exit? + - release_agent: the path to use for release notifications (this file + exists in the top cgroup only) + +Other subsystems such as cpusets may add additional files in each +cgroup dir. + +New cgroups are created using the mkdir system call or shell +command. The properties of a cgroup, such as its flags, are +modified by writing to the appropriate file in that cgroups +directory, as listed above. + +The named hierarchical structure of nested cgroups allows partitioning +a large system into nested, dynamically changeable, "soft-partitions". + +The attachment of each task, automatically inherited at fork by any +children of that task, to a cgroup allows organizing the work load +on a system into related sets of tasks. A task may be re-attached to +any other cgroup, if allowed by the permissions on the necessary +cgroup file system directories. + +When a task is moved from one cgroup to another, it gets a new +css_set pointer - if there's an already existing css_set with the +desired collection of cgroups then that group is reused, otherwise a new +css_set is allocated. The appropriate existing css_set is located by +looking into a hash table. + +To allow access from a cgroup to the css_sets (and hence tasks) +that comprise it, a set of cg_cgroup_link objects form a lattice; +each cg_cgroup_link is linked into a list of cg_cgroup_links for +a single cgroup on its cgrp_link_list field, and a list of +cg_cgroup_links for a single css_set on its cg_link_list. + +Thus the set of tasks in a cgroup can be listed by iterating over +each css_set that references the cgroup, and sub-iterating over +each css_set's task set. + +The use of a Linux virtual file system (vfs) to represent the +cgroup hierarchy provides for a familiar permission and name space +for cgroups, with a minimum of additional kernel code. + +1.4 What does notify_on_release do ? +------------------------------------ + +If the notify_on_release flag is enabled (1) in a cgroup, then +whenever the last task in the cgroup leaves (exits or attaches to +some other cgroup) and the last child cgroup of that cgroup +is removed, then the kernel runs the command specified by the contents +of the "release_agent" file in that hierarchy's root directory, +supplying the pathname (relative to the mount point of the cgroup +file system) of the abandoned cgroup. This enables automatic +removal of abandoned cgroups. The default value of +notify_on_release in the root cgroup at system boot is disabled +(0). The default value of other cgroups at creation is the current +value of their parents' notify_on_release settings. The default value of +a cgroup hierarchy's release_agent path is empty. + +1.5 What does clone_children do ? +--------------------------------- + +This flag only affects the cpuset controller. If the clone_children +flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its +configuration from the parent during initialization. + +1.6 How do I use cgroups ? +-------------------------- + +To start a new job that is to be contained within a cgroup, using +the "cpuset" cgroup subsystem, the steps are something like:: + + 1) mount -t tmpfs cgroup_root /sys/fs/cgroup + 2) mkdir /sys/fs/cgroup/cpuset + 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + 4) Create the new cgroup by doing mkdir's and write's (or echo's) in + the /sys/fs/cgroup/cpuset virtual file system. + 5) Start a task that will be the "founding father" of the new job. + 6) Attach that task to the new cgroup by writing its PID to the + /sys/fs/cgroup/cpuset tasks file for that cgroup. + 7) fork, exec or clone the job tasks from this founding father task. + +For example, the following sequence of commands will setup a cgroup +named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, +and then start a subshell 'sh' in that cgroup:: + + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/cpuset + mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset + mkdir Charlie + cd Charlie + /bin/echo 2-3 > cpuset.cpus + /bin/echo 1 > cpuset.mems + /bin/echo $$ > tasks + sh + # The subshell 'sh' is now running in cgroup Charlie + # The next line should display '/Charlie' + cat /proc/self/cgroup + +2. Usage Examples and Syntax +============================ + +2.1 Basic Usage +--------------- + +Creating, modifying, using cgroups can be done through the cgroup +virtual filesystem. + +To mount a cgroup hierarchy with all available subsystems, type:: + + # mount -t cgroup xxx /sys/fs/cgroup + +The "xxx" is not interpreted by the cgroup code, but will appear in +/proc/mounts so may be any useful identifying string that you like. + +Note: Some subsystems do not work without some user input first. For instance, +if cpusets are enabled the user will have to populate the cpus and mems files +for each new cgroup created before that group can be used. + +As explained in section `1.2 Why are cgroups needed?` you should create +different hierarchies of cgroups for each single resource or group of +resources you want to control. Therefore, you should mount a tmpfs on +/sys/fs/cgroup and create directories for each cgroup resource or resource +group:: + + # mount -t tmpfs cgroup_root /sys/fs/cgroup + # mkdir /sys/fs/cgroup/rg1 + +To mount a cgroup hierarchy with just the cpuset and memory +subsystems, type:: + + # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1 + +While remounting cgroups is currently supported, it is not recommend +to use it. Remounting allows changing bound subsystems and +release_agent. Rebinding is hardly useful as it only works when the +hierarchy is empty and release_agent itself should be replaced with +conventional fsnotify. The support for remounting will be removed in +the future. + +To Specify a hierarchy's release_agent:: + + # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ + xxx /sys/fs/cgroup/rg1 + +Note that specifying 'release_agent' more than once will return failure. + +Note that changing the set of subsystems is currently only supported +when the hierarchy consists of a single (root) cgroup. Supporting +the ability to arbitrarily bind/unbind subsystems from an existing +cgroup hierarchy is intended to be implemented in the future. + +Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the +tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1 +is the cgroup that holds the whole system. + +If you want to change the value of release_agent:: + + # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent + +It can also be changed via remount. + +If you want to create a new cgroup under /sys/fs/cgroup/rg1:: + + # cd /sys/fs/cgroup/rg1 + # mkdir my_cgroup + +Now you want to do something with this cgroup: + + # cd my_cgroup + +In this directory you can find several files:: + + # ls + cgroup.procs notify_on_release tasks + (plus whatever files added by the attached subsystems) + +Now attach your shell to this cgroup:: + + # /bin/echo $$ > tasks + +You can also create cgroups inside your cgroup by using mkdir in this +directory:: + + # mkdir my_sub_cs + +To remove a cgroup, just use rmdir:: + + # rmdir my_sub_cs + +This will fail if the cgroup is in use (has cgroups inside, or +has processes attached, or is held alive by other subsystem-specific +reference). + +2.2 Attaching processes +----------------------- + +:: + + # /bin/echo PID > tasks + +Note that it is PID, not PIDs. You can only attach ONE task at a time. +If you have several tasks to attach, you have to do it one after another:: + + # /bin/echo PID1 > tasks + # /bin/echo PID2 > tasks + ... + # /bin/echo PIDn > tasks + +You can attach the current shell task by echoing 0:: + + # echo 0 > tasks + +You can use the cgroup.procs file instead of the tasks file to move all +threads in a threadgroup at once. Echoing the PID of any task in a +threadgroup to cgroup.procs causes all tasks in that threadgroup to be +attached to the cgroup. Writing 0 to cgroup.procs moves all tasks +in the writing task's threadgroup. + +Note: Since every task is always a member of exactly one cgroup in each +mounted hierarchy, to remove a task from its current cgroup you must +move it into a new cgroup (possibly the root cgroup) by writing to the +new cgroup's tasks file. + +Note: Due to some restrictions enforced by some cgroup subsystems, moving +a process to another cgroup can fail. + +2.3 Mounting hierarchies by name +-------------------------------- + +Passing the name= option when mounting a cgroups hierarchy +associates the given name with the hierarchy. This can be used when +mounting a pre-existing hierarchy, in order to refer to it by name +rather than by its set of active subsystems. Each hierarchy is either +nameless, or has a unique name. + +The name should match [\w.-]+ + +When passing a name= option for a new hierarchy, you need to +specify subsystems manually; the legacy behaviour of mounting all +subsystems when none are explicitly specified is not supported when +you give a subsystem a name. + +The name of the subsystem appears as part of the hierarchy description +in /proc/mounts and /proc//cgroups. + + +3. Kernel API +============= + +3.1 Overview +------------ + +Each kernel subsystem that wants to hook into the generic cgroup +system needs to create a cgroup_subsys object. This contains +various methods, which are callbacks from the cgroup system, along +with a subsystem ID which will be assigned by the cgroup system. + +Other fields in the cgroup_subsys object include: + +- subsys_id: a unique array index for the subsystem, indicating which + entry in cgroup->subsys[] this subsystem should be managing. + +- name: should be initialized to a unique subsystem name. Should be + no longer than MAX_CGROUP_TYPE_NAMELEN. + +- early_init: indicate if the subsystem needs early initialization + at system boot. + +Each cgroup object created by the system has an array of pointers, +indexed by subsystem ID; this pointer is entirely managed by the +subsystem; the generic cgroup code will never touch this pointer. + +3.2 Synchronization +------------------- + +There is a global mutex, cgroup_mutex, used by the cgroup +system. This should be taken by anything that wants to modify a +cgroup. It may also be taken to prevent cgroups from being +modified, but more specific locks may be more appropriate in that +situation. + +See kernel/cgroup.c for more details. + +Subsystems can take/release the cgroup_mutex via the functions +cgroup_lock()/cgroup_unlock(). + +Accessing a task's cgroup pointer may be done in the following ways: +- while holding cgroup_mutex +- while holding the task's alloc_lock (via task_lock()) +- inside an rcu_read_lock() section via rcu_dereference() + +3.3 Subsystem API +----------------- + +Each subsystem should: + +- add an entry in linux/cgroup_subsys.h +- define a cgroup_subsys object called _cgrp_subsys + +Each subsystem may export the following methods. The only mandatory +methods are css_alloc/free. Any others that are null are presumed to +be successful no-ops. + +``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)`` +(cgroup_mutex held by caller) + +Called to allocate a subsystem state object for a cgroup. The +subsystem should allocate its subsystem state object for the passed +cgroup, returning a pointer to the new object on success or a +ERR_PTR() value. On success, the subsystem pointer should point to +a structure of type cgroup_subsys_state (typically embedded in a +larger subsystem-specific object), which will be initialized by the +cgroup system. Note that this will be called at initialization to +create the root subsystem state for this subsystem; this case can be +identified by the passed cgroup object having a NULL parent (since +it's the root of the hierarchy) and may be an appropriate place for +initialization code. + +``int css_online(struct cgroup *cgrp)`` +(cgroup_mutex held by caller) + +Called after @cgrp successfully completed all allocations and made +visible to cgroup_for_each_child/descendant_*() iterators. The +subsystem may choose to fail creation by returning -errno. This +callback can be used to implement reliable state sharing and +propagation along the hierarchy. See the comment on +cgroup_for_each_descendant_pre() for details. + +``void css_offline(struct cgroup *cgrp);`` +(cgroup_mutex held by caller) + +This is the counterpart of css_online() and called iff css_online() +has succeeded on @cgrp. This signifies the beginning of the end of +@cgrp. @cgrp is being removed and the subsystem should start dropping +all references it's holding on @cgrp. When all references are dropped, +cgroup removal will proceed to the next step - css_free(). After this +callback, @cgrp should be considered dead to the subsystem. + +``void css_free(struct cgroup *cgrp)`` +(cgroup_mutex held by caller) + +The cgroup system is about to free @cgrp; the subsystem should free +its subsystem state object. By the time this method is called, @cgrp +is completely unused; @cgrp->parent is still valid. (Note - can also +be called for a newly-created cgroup if an error occurs after this +subsystem's create() method has been called for the new cgroup). + +``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)`` +(cgroup_mutex held by caller) + +Called prior to moving one or more tasks into a cgroup; if the +subsystem returns an error, this will abort the attach operation. +@tset contains the tasks to be attached and is guaranteed to have at +least one task in it. + +If there are multiple tasks in the taskset, then: + - it's guaranteed that all are from the same thread group + - @tset contains all tasks from the thread group whether or not + they're switching cgroups + - the first task is the leader + +Each @tset entry also contains the task's old cgroup and tasks which +aren't switching cgroup can be skipped easily using the +cgroup_taskset_for_each() iterator. Note that this isn't called on a +fork. If this method returns 0 (success) then this should remain valid +while the caller holds cgroup_mutex and it is ensured that either +attach() or cancel_attach() will be called in future. + +``void css_reset(struct cgroup_subsys_state *css)`` +(cgroup_mutex held by caller) + +An optional operation which should restore @css's configuration to the +initial state. This is currently only used on the unified hierarchy +when a subsystem is disabled on a cgroup through +"cgroup.subtree_control" but should remain enabled because other +subsystems depend on it. cgroup core makes such a css invisible by +removing the associated interface files and invokes this callback so +that the hidden subsystem can return to the initial neutral state. +This prevents unexpected resource control from a hidden css and +ensures that the configuration is in the initial state when it is made +visible again later. + +``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)`` +(cgroup_mutex held by caller) + +Called when a task attach operation has failed after can_attach() has succeeded. +A subsystem whose can_attach() has some side-effects should provide this +function, so that the subsystem can implement a rollback. If not, not necessary. +This will be called only about subsystems whose can_attach() operation have +succeeded. The parameters are identical to can_attach(). + +``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)`` +(cgroup_mutex held by caller) + +Called after the task has been attached to the cgroup, to allow any +post-attachment activity that requires memory allocations or blocking. +The parameters are identical to can_attach(). + +``void fork(struct task_struct *task)`` + +Called when a task is forked into a cgroup. + +``void exit(struct task_struct *task)`` + +Called during task exit. + +``void free(struct task_struct *task)`` + +Called when the task_struct is freed. + +``void bind(struct cgroup *root)`` +(cgroup_mutex held by caller) + +Called when a cgroup subsystem is rebound to a different hierarchy +and root cgroup. Currently this will only involve movement between +the default hierarchy (which never has sub-cgroups) and a hierarchy +that is being created/destroyed (and hence has no sub-cgroups). + +4. Extended attribute usage +=========================== + +cgroup filesystem supports certain types of extended attributes in its +directories and files. The current supported types are: + + - Trusted (XATTR_TRUSTED) + - Security (XATTR_SECURITY) + +Both require CAP_SYS_ADMIN capability to set. + +Like in tmpfs, the extended attributes in cgroup filesystem are stored +using kernel memory and it's advised to keep the usage at minimum. This +is the reason why user defined extended attributes are not supported, since +any user can do it and there's no limit in the value size. + +The current known users for this feature are SELinux to limit cgroup usage +in containers and systemd for assorted meta data like main PID in a cgroup +(systemd creates a cgroup per service). + +5. Questions +============ + +:: + + Q: what's up with this '/bin/echo' ? + A: bash's builtin 'echo' command does not check calls to write() against + errors. If you use it in the cgroup file system, you won't be + able to tell whether a command succeeded or failed. + + Q: When I attach processes, only the first of the line gets really attached ! + A: We can only return one error code per call to write(). So you should also + put only ONE PID. diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt deleted file mode 100644 index 059f7063eea6..000000000000 --- a/Documentation/cgroup-v1/cgroups.txt +++ /dev/null @@ -1,677 +0,0 @@ - CGROUPS - ------- - -Written by Paul Menage based on -Documentation/cgroup-v1/cpusets.txt - -Original copyright statements from cpusets.txt: -Portions Copyright (C) 2004 BULL SA. -Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. -Modified by Paul Jackson -Modified by Christoph Lameter - -CONTENTS: -========= - -1. Control Groups - 1.1 What are cgroups ? - 1.2 Why are cgroups needed ? - 1.3 How are cgroups implemented ? - 1.4 What does notify_on_release do ? - 1.5 What does clone_children do ? - 1.6 How do I use cgroups ? -2. Usage Examples and Syntax - 2.1 Basic Usage - 2.2 Attaching processes - 2.3 Mounting hierarchies by name -3. Kernel API - 3.1 Overview - 3.2 Synchronization - 3.3 Subsystem API -4. Extended attributes usage -5. Questions - -1. Control Groups -================= - -1.1 What are cgroups ? ----------------------- - -Control Groups provide a mechanism for aggregating/partitioning sets of -tasks, and all their future children, into hierarchical groups with -specialized behaviour. - -Definitions: - -A *cgroup* associates a set of tasks with a set of parameters for one -or more subsystems. - -A *subsystem* is a module that makes use of the task grouping -facilities provided by cgroups to treat groups of tasks in -particular ways. A subsystem is typically a "resource controller" that -schedules a resource or applies per-cgroup limits, but it may be -anything that wants to act on a group of processes, e.g. a -virtualization subsystem. - -A *hierarchy* is a set of cgroups arranged in a tree, such that -every task in the system is in exactly one of the cgroups in the -hierarchy, and a set of subsystems; each subsystem has system-specific -state attached to each cgroup in the hierarchy. Each hierarchy has -an instance of the cgroup virtual filesystem associated with it. - -At any one time there may be multiple active hierarchies of task -cgroups. Each hierarchy is a partition of all tasks in the system. - -User-level code may create and destroy cgroups by name in an -instance of the cgroup virtual file system, specify and query to -which cgroup a task is assigned, and list the task PIDs assigned to -a cgroup. Those creations and assignments only affect the hierarchy -associated with that instance of the cgroup file system. - -On their own, the only use for cgroups is for simple job -tracking. The intention is that other subsystems hook into the generic -cgroup support to provide new attributes for cgroups, such as -accounting/limiting the resources which processes in a cgroup can -access. For example, cpusets (see Documentation/cgroup-v1/cpusets.txt) allow -you to associate a set of CPUs and a set of memory nodes with the -tasks in each cgroup. - -1.2 Why are cgroups needed ? ----------------------------- - -There are multiple efforts to provide process aggregations in the -Linux kernel, mainly for resource-tracking purposes. Such efforts -include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server -namespaces. These all require the basic notion of a -grouping/partitioning of processes, with newly forked processes ending -up in the same group (cgroup) as their parent process. - -The kernel cgroup patch provides the minimum essential kernel -mechanisms required to efficiently implement such groups. It has -minimal impact on the system fast paths, and provides hooks for -specific subsystems such as cpusets to provide additional behaviour as -desired. - -Multiple hierarchy support is provided to allow for situations where -the division of tasks into cgroups is distinctly different for -different subsystems - having parallel hierarchies allows each -hierarchy to be a natural division of tasks, without having to handle -complex combinations of tasks that would be present if several -unrelated subsystems needed to be forced into the same tree of -cgroups. - -At one extreme, each resource controller or subsystem could be in a -separate hierarchy; at the other extreme, all subsystems -would be attached to the same hierarchy. - -As an example of a scenario (originally proposed by vatsa@in.ibm.com) -that can benefit from multiple hierarchies, consider a large -university server with various users - students, professors, system -tasks etc. The resource planning for this server could be along the -following lines: - - CPU : "Top cpuset" - / \ - CPUSet1 CPUSet2 - | | - (Professors) (Students) - - In addition (system tasks) are attached to topcpuset (so - that they can run anywhere) with a limit of 20% - - Memory : Professors (50%), Students (30%), system (20%) - - Disk : Professors (50%), Students (30%), system (20%) - - Network : WWW browsing (20%), Network File System (60%), others (20%) - / \ - Professors (15%) students (5%) - -Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes -into the NFS network class. - -At the same time Firefox/Lynx will share an appropriate CPU/Memory class -depending on who launched it (prof/student). - -With the ability to classify tasks differently for different resources -(by putting those resource subsystems in different hierarchies), -the admin can easily set up a script which receives exec notifications -and depending on who is launching the browser he can - - # echo browser_pid > /sys/fs/cgroup///tasks - -With only a single hierarchy, he now would potentially have to create -a separate cgroup for every browser launched and associate it with -appropriate network and other resource class. This may lead to -proliferation of such cgroups. - -Also let's say that the administrator would like to give enhanced network -access temporarily to a student's browser (since it is night and the user -wants to do online gaming :)) OR give one of the student's simulation -apps enhanced CPU power. - -With ability to write PIDs directly to resource classes, it's just a -matter of: - - # echo pid > /sys/fs/cgroup/network//tasks - (after some time) - # echo pid > /sys/fs/cgroup/network//tasks - -Without this ability, the administrator would have to split the cgroup into -multiple separate ones and then associate the new cgroups with the -new resource classes. - - - -1.3 How are cgroups implemented ? ---------------------------------- - -Control Groups extends the kernel as follows: - - - Each task in the system has a reference-counted pointer to a - css_set. - - - A css_set contains a set of reference-counted pointers to - cgroup_subsys_state objects, one for each cgroup subsystem - registered in the system. There is no direct link from a task to - the cgroup of which it's a member in each hierarchy, but this - can be determined by following pointers through the - cgroup_subsys_state objects. This is because accessing the - subsystem state is something that's expected to happen frequently - and in performance-critical code, whereas operations that require a - task's actual cgroup assignments (in particular, moving between - cgroups) are less common. A linked list runs through the cg_list - field of each task_struct using the css_set, anchored at - css_set->tasks. - - - A cgroup hierarchy filesystem can be mounted for browsing and - manipulation from user space. - - - You can list all the tasks (by PID) attached to any cgroup. - -The implementation of cgroups requires a few, simple hooks -into the rest of the kernel, none in performance-critical paths: - - - in init/main.c, to initialize the root cgroups and initial - css_set at system boot. - - - in fork and exit, to attach and detach a task from its css_set. - -In addition, a new file system of type "cgroup" may be mounted, to -enable browsing and modifying the cgroups presently known to the -kernel. When mounting a cgroup hierarchy, you may specify a -comma-separated list of subsystems to mount as the filesystem mount -options. By default, mounting the cgroup filesystem attempts to -mount a hierarchy containing all registered subsystems. - -If an active hierarchy with exactly the same set of subsystems already -exists, it will be reused for the new mount. If no existing hierarchy -matches, and any of the requested subsystems are in use in an existing -hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy -is activated, associated with the requested subsystems. - -It's not currently possible to bind a new subsystem to an active -cgroup hierarchy, or to unbind a subsystem from an active cgroup -hierarchy. This may be possible in future, but is fraught with nasty -error-recovery issues. - -When a cgroup filesystem is unmounted, if there are any -child cgroups created below the top-level cgroup, that hierarchy -will remain active even though unmounted; if there are no -child cgroups then the hierarchy will be deactivated. - -No new system calls are added for cgroups - all support for -querying and modifying cgroups is via this cgroup file system. - -Each task under /proc has an added file named 'cgroup' displaying, -for each active hierarchy, the subsystem names and the cgroup name -as the path relative to the root of the cgroup file system. - -Each cgroup is represented by a directory in the cgroup file system -containing the following files describing that cgroup: - - - tasks: list of tasks (by PID) attached to that cgroup. This list - is not guaranteed to be sorted. Writing a thread ID into this file - moves the thread into this cgroup. - - cgroup.procs: list of thread group IDs in the cgroup. This list is - not guaranteed to be sorted or free of duplicate TGIDs, and userspace - should sort/uniquify the list if this property is required. - Writing a thread group ID into this file moves all threads in that - group into this cgroup. - - notify_on_release flag: run the release agent on exit? - - release_agent: the path to use for release notifications (this file - exists in the top cgroup only) - -Other subsystems such as cpusets may add additional files in each -cgroup dir. - -New cgroups are created using the mkdir system call or shell -command. The properties of a cgroup, such as its flags, are -modified by writing to the appropriate file in that cgroups -directory, as listed above. - -The named hierarchical structure of nested cgroups allows partitioning -a large system into nested, dynamically changeable, "soft-partitions". - -The attachment of each task, automatically inherited at fork by any -children of that task, to a cgroup allows organizing the work load -on a system into related sets of tasks. A task may be re-attached to -any other cgroup, if allowed by the permissions on the necessary -cgroup file system directories. - -When a task is moved from one cgroup to another, it gets a new -css_set pointer - if there's an already existing css_set with the -desired collection of cgroups then that group is reused, otherwise a new -css_set is allocated. The appropriate existing css_set is located by -looking into a hash table. - -To allow access from a cgroup to the css_sets (and hence tasks) -that comprise it, a set of cg_cgroup_link objects form a lattice; -each cg_cgroup_link is linked into a list of cg_cgroup_links for -a single cgroup on its cgrp_link_list field, and a list of -cg_cgroup_links for a single css_set on its cg_link_list. - -Thus the set of tasks in a cgroup can be listed by iterating over -each css_set that references the cgroup, and sub-iterating over -each css_set's task set. - -The use of a Linux virtual file system (vfs) to represent the -cgroup hierarchy provides for a familiar permission and name space -for cgroups, with a minimum of additional kernel code. - -1.4 What does notify_on_release do ? ------------------------------------- - -If the notify_on_release flag is enabled (1) in a cgroup, then -whenever the last task in the cgroup leaves (exits or attaches to -some other cgroup) and the last child cgroup of that cgroup -is removed, then the kernel runs the command specified by the contents -of the "release_agent" file in that hierarchy's root directory, -supplying the pathname (relative to the mount point of the cgroup -file system) of the abandoned cgroup. This enables automatic -removal of abandoned cgroups. The default value of -notify_on_release in the root cgroup at system boot is disabled -(0). The default value of other cgroups at creation is the current -value of their parents' notify_on_release settings. The default value of -a cgroup hierarchy's release_agent path is empty. - -1.5 What does clone_children do ? ---------------------------------- - -This flag only affects the cpuset controller. If the clone_children -flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its -configuration from the parent during initialization. - -1.6 How do I use cgroups ? --------------------------- - -To start a new job that is to be contained within a cgroup, using -the "cpuset" cgroup subsystem, the steps are something like: - - 1) mount -t tmpfs cgroup_root /sys/fs/cgroup - 2) mkdir /sys/fs/cgroup/cpuset - 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset - 4) Create the new cgroup by doing mkdir's and write's (or echo's) in - the /sys/fs/cgroup/cpuset virtual file system. - 5) Start a task that will be the "founding father" of the new job. - 6) Attach that task to the new cgroup by writing its PID to the - /sys/fs/cgroup/cpuset tasks file for that cgroup. - 7) fork, exec or clone the job tasks from this founding father task. - -For example, the following sequence of commands will setup a cgroup -named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, -and then start a subshell 'sh' in that cgroup: - - mount -t tmpfs cgroup_root /sys/fs/cgroup - mkdir /sys/fs/cgroup/cpuset - mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset - cd /sys/fs/cgroup/cpuset - mkdir Charlie - cd Charlie - /bin/echo 2-3 > cpuset.cpus - /bin/echo 1 > cpuset.mems - /bin/echo $$ > tasks - sh - # The subshell 'sh' is now running in cgroup Charlie - # The next line should display '/Charlie' - cat /proc/self/cgroup - -2. Usage Examples and Syntax -============================ - -2.1 Basic Usage ---------------- - -Creating, modifying, using cgroups can be done through the cgroup -virtual filesystem. - -To mount a cgroup hierarchy with all available subsystems, type: -# mount -t cgroup xxx /sys/fs/cgroup - -The "xxx" is not interpreted by the cgroup code, but will appear in -/proc/mounts so may be any useful identifying string that you like. - -Note: Some subsystems do not work without some user input first. For instance, -if cpusets are enabled the user will have to populate the cpus and mems files -for each new cgroup created before that group can be used. - -As explained in section `1.2 Why are cgroups needed?' you should create -different hierarchies of cgroups for each single resource or group of -resources you want to control. Therefore, you should mount a tmpfs on -/sys/fs/cgroup and create directories for each cgroup resource or resource -group. - -# mount -t tmpfs cgroup_root /sys/fs/cgroup -# mkdir /sys/fs/cgroup/rg1 - -To mount a cgroup hierarchy with just the cpuset and memory -subsystems, type: -# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1 - -While remounting cgroups is currently supported, it is not recommend -to use it. Remounting allows changing bound subsystems and -release_agent. Rebinding is hardly useful as it only works when the -hierarchy is empty and release_agent itself should be replaced with -conventional fsnotify. The support for remounting will be removed in -the future. - -To Specify a hierarchy's release_agent: -# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ - xxx /sys/fs/cgroup/rg1 - -Note that specifying 'release_agent' more than once will return failure. - -Note that changing the set of subsystems is currently only supported -when the hierarchy consists of a single (root) cgroup. Supporting -the ability to arbitrarily bind/unbind subsystems from an existing -cgroup hierarchy is intended to be implemented in the future. - -Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the -tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1 -is the cgroup that holds the whole system. - -If you want to change the value of release_agent: -# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent - -It can also be changed via remount. - -If you want to create a new cgroup under /sys/fs/cgroup/rg1: -# cd /sys/fs/cgroup/rg1 -# mkdir my_cgroup - -Now you want to do something with this cgroup. -# cd my_cgroup - -In this directory you can find several files: -# ls -cgroup.procs notify_on_release tasks -(plus whatever files added by the attached subsystems) - -Now attach your shell to this cgroup: -# /bin/echo $$ > tasks - -You can also create cgroups inside your cgroup by using mkdir in this -directory. -# mkdir my_sub_cs - -To remove a cgroup, just use rmdir: -# rmdir my_sub_cs - -This will fail if the cgroup is in use (has cgroups inside, or -has processes attached, or is held alive by other subsystem-specific -reference). - -2.2 Attaching processes ------------------------ - -# /bin/echo PID > tasks - -Note that it is PID, not PIDs. You can only attach ONE task at a time. -If you have several tasks to attach, you have to do it one after another: - -# /bin/echo PID1 > tasks -# /bin/echo PID2 > tasks - ... -# /bin/echo PIDn > tasks - -You can attach the current shell task by echoing 0: - -# echo 0 > tasks - -You can use the cgroup.procs file instead of the tasks file to move all -threads in a threadgroup at once. Echoing the PID of any task in a -threadgroup to cgroup.procs causes all tasks in that threadgroup to be -attached to the cgroup. Writing 0 to cgroup.procs moves all tasks -in the writing task's threadgroup. - -Note: Since every task is always a member of exactly one cgroup in each -mounted hierarchy, to remove a task from its current cgroup you must -move it into a new cgroup (possibly the root cgroup) by writing to the -new cgroup's tasks file. - -Note: Due to some restrictions enforced by some cgroup subsystems, moving -a process to another cgroup can fail. - -2.3 Mounting hierarchies by name --------------------------------- - -Passing the name= option when mounting a cgroups hierarchy -associates the given name with the hierarchy. This can be used when -mounting a pre-existing hierarchy, in order to refer to it by name -rather than by its set of active subsystems. Each hierarchy is either -nameless, or has a unique name. - -The name should match [\w.-]+ - -When passing a name= option for a new hierarchy, you need to -specify subsystems manually; the legacy behaviour of mounting all -subsystems when none are explicitly specified is not supported when -you give a subsystem a name. - -The name of the subsystem appears as part of the hierarchy description -in /proc/mounts and /proc//cgroups. - - -3. Kernel API -============= - -3.1 Overview ------------- - -Each kernel subsystem that wants to hook into the generic cgroup -system needs to create a cgroup_subsys object. This contains -various methods, which are callbacks from the cgroup system, along -with a subsystem ID which will be assigned by the cgroup system. - -Other fields in the cgroup_subsys object include: - -- subsys_id: a unique array index for the subsystem, indicating which - entry in cgroup->subsys[] this subsystem should be managing. - -- name: should be initialized to a unique subsystem name. Should be - no longer than MAX_CGROUP_TYPE_NAMELEN. - -- early_init: indicate if the subsystem needs early initialization - at system boot. - -Each cgroup object created by the system has an array of pointers, -indexed by subsystem ID; this pointer is entirely managed by the -subsystem; the generic cgroup code will never touch this pointer. - -3.2 Synchronization -------------------- - -There is a global mutex, cgroup_mutex, used by the cgroup -system. This should be taken by anything that wants to modify a -cgroup. It may also be taken to prevent cgroups from being -modified, but more specific locks may be more appropriate in that -situation. - -See kernel/cgroup.c for more details. - -Subsystems can take/release the cgroup_mutex via the functions -cgroup_lock()/cgroup_unlock(). - -Accessing a task's cgroup pointer may be done in the following ways: -- while holding cgroup_mutex -- while holding the task's alloc_lock (via task_lock()) -- inside an rcu_read_lock() section via rcu_dereference() - -3.3 Subsystem API ------------------ - -Each subsystem should: - -- add an entry in linux/cgroup_subsys.h -- define a cgroup_subsys object called _cgrp_subsys - -Each subsystem may export the following methods. The only mandatory -methods are css_alloc/free. Any others that are null are presumed to -be successful no-ops. - -struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp) -(cgroup_mutex held by caller) - -Called to allocate a subsystem state object for a cgroup. The -subsystem should allocate its subsystem state object for the passed -cgroup, returning a pointer to the new object on success or a -ERR_PTR() value. On success, the subsystem pointer should point to -a structure of type cgroup_subsys_state (typically embedded in a -larger subsystem-specific object), which will be initialized by the -cgroup system. Note that this will be called at initialization to -create the root subsystem state for this subsystem; this case can be -identified by the passed cgroup object having a NULL parent (since -it's the root of the hierarchy) and may be an appropriate place for -initialization code. - -int css_online(struct cgroup *cgrp) -(cgroup_mutex held by caller) - -Called after @cgrp successfully completed all allocations and made -visible to cgroup_for_each_child/descendant_*() iterators. The -subsystem may choose to fail creation by returning -errno. This -callback can be used to implement reliable state sharing and -propagation along the hierarchy. See the comment on -cgroup_for_each_descendant_pre() for details. - -void css_offline(struct cgroup *cgrp); -(cgroup_mutex held by caller) - -This is the counterpart of css_online() and called iff css_online() -has succeeded on @cgrp. This signifies the beginning of the end of -@cgrp. @cgrp is being removed and the subsystem should start dropping -all references it's holding on @cgrp. When all references are dropped, -cgroup removal will proceed to the next step - css_free(). After this -callback, @cgrp should be considered dead to the subsystem. - -void css_free(struct cgroup *cgrp) -(cgroup_mutex held by caller) - -The cgroup system is about to free @cgrp; the subsystem should free -its subsystem state object. By the time this method is called, @cgrp -is completely unused; @cgrp->parent is still valid. (Note - can also -be called for a newly-created cgroup if an error occurs after this -subsystem's create() method has been called for the new cgroup). - -int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -(cgroup_mutex held by caller) - -Called prior to moving one or more tasks into a cgroup; if the -subsystem returns an error, this will abort the attach operation. -@tset contains the tasks to be attached and is guaranteed to have at -least one task in it. - -If there are multiple tasks in the taskset, then: - - it's guaranteed that all are from the same thread group - - @tset contains all tasks from the thread group whether or not - they're switching cgroups - - the first task is the leader - -Each @tset entry also contains the task's old cgroup and tasks which -aren't switching cgroup can be skipped easily using the -cgroup_taskset_for_each() iterator. Note that this isn't called on a -fork. If this method returns 0 (success) then this should remain valid -while the caller holds cgroup_mutex and it is ensured that either -attach() or cancel_attach() will be called in future. - -void css_reset(struct cgroup_subsys_state *css) -(cgroup_mutex held by caller) - -An optional operation which should restore @css's configuration to the -initial state. This is currently only used on the unified hierarchy -when a subsystem is disabled on a cgroup through -"cgroup.subtree_control" but should remain enabled because other -subsystems depend on it. cgroup core makes such a css invisible by -removing the associated interface files and invokes this callback so -that the hidden subsystem can return to the initial neutral state. -This prevents unexpected resource control from a hidden css and -ensures that the configuration is in the initial state when it is made -visible again later. - -void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -(cgroup_mutex held by caller) - -Called when a task attach operation has failed after can_attach() has succeeded. -A subsystem whose can_attach() has some side-effects should provide this -function, so that the subsystem can implement a rollback. If not, not necessary. -This will be called only about subsystems whose can_attach() operation have -succeeded. The parameters are identical to can_attach(). - -void attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -(cgroup_mutex held by caller) - -Called after the task has been attached to the cgroup, to allow any -post-attachment activity that requires memory allocations or blocking. -The parameters are identical to can_attach(). - -void fork(struct task_struct *task) - -Called when a task is forked into a cgroup. - -void exit(struct task_struct *task) - -Called during task exit. - -void free(struct task_struct *task) - -Called when the task_struct is freed. - -void bind(struct cgroup *root) -(cgroup_mutex held by caller) - -Called when a cgroup subsystem is rebound to a different hierarchy -and root cgroup. Currently this will only involve movement between -the default hierarchy (which never has sub-cgroups) and a hierarchy -that is being created/destroyed (and hence has no sub-cgroups). - -4. Extended attribute usage -=========================== - -cgroup filesystem supports certain types of extended attributes in its -directories and files. The current supported types are: - - Trusted (XATTR_TRUSTED) - - Security (XATTR_SECURITY) - -Both require CAP_SYS_ADMIN capability to set. - -Like in tmpfs, the extended attributes in cgroup filesystem are stored -using kernel memory and it's advised to keep the usage at minimum. This -is the reason why user defined extended attributes are not supported, since -any user can do it and there's no limit in the value size. - -The current known users for this feature are SELinux to limit cgroup usage -in containers and systemd for assorted meta data like main PID in a cgroup -(systemd creates a cgroup per service). - -5. Questions -============ - -Q: what's up with this '/bin/echo' ? -A: bash's builtin 'echo' command does not check calls to write() against - errors. If you use it in the cgroup file system, you won't be - able to tell whether a command succeeded or failed. - -Q: When I attach processes, only the first of the line gets really attached ! -A: We can only return one error code per call to write(). So you should also - put only ONE PID. - diff --git a/Documentation/cgroup-v1/cpuacct.rst b/Documentation/cgroup-v1/cpuacct.rst new file mode 100644 index 000000000000..d30ed81d2ad7 --- /dev/null +++ b/Documentation/cgroup-v1/cpuacct.rst @@ -0,0 +1,50 @@ +========================= +CPU Accounting Controller +========================= + +The CPU accounting controller is used to group tasks using cgroups and +account the CPU usage of these groups of tasks. + +The CPU accounting controller supports multi-hierarchy groups. An accounting +group accumulates the CPU usage of all of its child groups and the tasks +directly present in its group. + +Accounting groups can be created by first mounting the cgroup filesystem:: + + # mount -t cgroup -ocpuacct none /sys/fs/cgroup + +With the above step, the initial or the parent accounting group becomes +visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in +the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. +/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained +by this group which is essentially the CPU time obtained by all the tasks +in the system. + +New accounting groups can be created under the parent group /sys/fs/cgroup:: + + # cd /sys/fs/cgroup + # mkdir g1 + # echo $$ > g1/tasks + +The above steps create a new group g1 and move the current shell +process (bash) into it. CPU time consumed by this bash and its children +can be obtained from g1/cpuacct.usage and the same is accumulated in +/sys/fs/cgroup/cpuacct.usage also. + +cpuacct.stat file lists a few statistics which further divide the +CPU time obtained by the cgroup into user and system times. Currently +the following statistics are supported: + +user: Time spent by tasks of the cgroup in user mode. +system: Time spent by tasks of the cgroup in kernel mode. + +user and system are in USER_HZ unit. + +cpuacct controller uses percpu_counter interface to collect user and +system times. This has two side effects: + +- It is theoretically possible to see wrong values for user and system times. + This is because percpu_counter_read() on 32bit systems isn't safe + against concurrent writes. +- It is possible to see slightly outdated values for user and system times + due to the batch processing nature of percpu_counter. diff --git a/Documentation/cgroup-v1/cpuacct.txt b/Documentation/cgroup-v1/cpuacct.txt deleted file mode 100644 index 9d73cc0cadb9..000000000000 --- a/Documentation/cgroup-v1/cpuacct.txt +++ /dev/null @@ -1,49 +0,0 @@ -CPU Accounting Controller -------------------------- - -The CPU accounting controller is used to group tasks using cgroups and -account the CPU usage of these groups of tasks. - -The CPU accounting controller supports multi-hierarchy groups. An accounting -group accumulates the CPU usage of all of its child groups and the tasks -directly present in its group. - -Accounting groups can be created by first mounting the cgroup filesystem. - -# mount -t cgroup -ocpuacct none /sys/fs/cgroup - -With the above step, the initial or the parent accounting group becomes -visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in -the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. -/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained -by this group which is essentially the CPU time obtained by all the tasks -in the system. - -New accounting groups can be created under the parent group /sys/fs/cgroup. - -# cd /sys/fs/cgroup -# mkdir g1 -# echo $$ > g1/tasks - -The above steps create a new group g1 and move the current shell -process (bash) into it. CPU time consumed by this bash and its children -can be obtained from g1/cpuacct.usage and the same is accumulated in -/sys/fs/cgroup/cpuacct.usage also. - -cpuacct.stat file lists a few statistics which further divide the -CPU time obtained by the cgroup into user and system times. Currently -the following statistics are supported: - -user: Time spent by tasks of the cgroup in user mode. -system: Time spent by tasks of the cgroup in kernel mode. - -user and system are in USER_HZ unit. - -cpuacct controller uses percpu_counter interface to collect user and -system times. This has two side effects: - -- It is theoretically possible to see wrong values for user and system times. - This is because percpu_counter_read() on 32bit systems isn't safe - against concurrent writes. -- It is possible to see slightly outdated values for user and system times - due to the batch processing nature of percpu_counter. diff --git a/Documentation/cgroup-v1/cpusets.rst b/Documentation/cgroup-v1/cpusets.rst new file mode 100644 index 000000000000..b6a42cdea72b --- /dev/null +++ b/Documentation/cgroup-v1/cpusets.rst @@ -0,0 +1,866 @@ +======= +CPUSETS +======= + +Copyright (C) 2004 BULL SA. + +Written by Simon.Derr@bull.net + +- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. +- Modified by Paul Jackson +- Modified by Christoph Lameter +- Modified by Paul Menage +- Modified by Hidetoshi Seto + +.. CONTENTS: + + 1. Cpusets + 1.1 What are cpusets ? + 1.2 Why are cpusets needed ? + 1.3 How are cpusets implemented ? + 1.4 What are exclusive cpusets ? + 1.5 What is memory_pressure ? + 1.6 What is memory spread ? + 1.7 What is sched_load_balance ? + 1.8 What is sched_relax_domain_level ? + 1.9 How do I use cpusets ? + 2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Adding/removing cpus + 2.3 Setting flags + 2.4 Attaching processes + 3. Questions + 4. Contact + +1. Cpusets +========== + +1.1 What are cpusets ? +---------------------- + +Cpusets provide a mechanism for assigning a set of CPUs and Memory +Nodes to a set of tasks. In this document "Memory Node" refers to +an on-line node that contains memory. + +Cpusets constrain the CPU and Memory placement of tasks to only +the resources within a task's current cpuset. They form a nested +hierarchy visible in a virtual file system. These are the essential +hooks, beyond what is already present, required to manage dynamic +job placement on large systems. + +Cpusets use the generic cgroup subsystem described in +Documentation/cgroup-v1/cgroups.rst. + +Requests by a task, using the sched_setaffinity(2) system call to +include CPUs in its CPU affinity mask, and using the mbind(2) and +set_mempolicy(2) system calls to include Memory Nodes in its memory +policy, are both filtered through that task's cpuset, filtering out any +CPUs or Memory Nodes not in that cpuset. The scheduler will not +schedule a task on a CPU that is not allowed in its cpus_allowed +vector, and the kernel page allocator will not allocate a page on a +node that is not allowed in the requesting task's mems_allowed vector. + +User level code may create and destroy cpusets by name in the cgroup +virtual file system, manage the attributes and permissions of these +cpusets and which CPUs and Memory Nodes are assigned to each cpuset, +specify and query to which cpuset a task is assigned, and list the +task pids assigned to a cpuset. + + +1.2 Why are cpusets needed ? +---------------------------- + +The management of large computer systems, with many processors (CPUs), +complex memory cache hierarchies and multiple Memory Nodes having +non-uniform access times (NUMA) presents additional challenges for +the efficient scheduling and memory placement of processes. + +Frequently more modest sized systems can be operated with adequate +efficiency just by letting the operating system automatically share +the available CPU and Memory resources amongst the requesting tasks. + +But larger systems, which benefit more from careful processor and +memory placement to reduce memory access times and contention, +and which typically represent a larger investment for the customer, +can benefit from explicitly placing jobs on properly sized subsets of +the system. + +This can be especially valuable on: + + * Web Servers running multiple instances of the same web application, + * Servers running different applications (for instance, a web server + and a database), or + * NUMA systems running large HPC applications with demanding + performance characteristics. + +These subsets, or "soft partitions" must be able to be dynamically +adjusted, as the job mix changes, without impacting other concurrently +executing jobs. The location of the running jobs pages may also be moved +when the memory locations are changed. + +The kernel cpuset patch provides the minimum essential kernel +mechanisms required to efficiently implement such subsets. It +leverages existing CPU and Memory Placement facilities in the Linux +kernel to avoid any additional impact on the critical scheduler or +memory allocator code. + + +1.3 How are cpusets implemented ? +--------------------------------- + +Cpusets provide a Linux kernel mechanism to constrain which CPUs and +Memory Nodes are used by a process or set of processes. + +The Linux kernel already has a pair of mechanisms to specify on which +CPUs a task may be scheduled (sched_setaffinity) and on which Memory +Nodes it may obtain memory (mbind, set_mempolicy). + +Cpusets extends these two mechanisms as follows: + + - Cpusets are sets of allowed CPUs and Memory Nodes, known to the + kernel. + - Each task in the system is attached to a cpuset, via a pointer + in the task structure to a reference counted cgroup structure. + - Calls to sched_setaffinity are filtered to just those CPUs + allowed in that task's cpuset. + - Calls to mbind and set_mempolicy are filtered to just + those Memory Nodes allowed in that task's cpuset. + - The root cpuset contains all the systems CPUs and Memory + Nodes. + - For any cpuset, one can define child cpusets containing a subset + of the parents CPU and Memory Node resources. + - The hierarchy of cpusets can be mounted at /dev/cpuset, for + browsing and manipulation from user space. + - A cpuset may be marked exclusive, which ensures that no other + cpuset (except direct ancestors and descendants) may contain + any overlapping CPUs or Memory Nodes. + - You can list all the tasks (by pid) attached to any cpuset. + +The implementation of cpusets requires a few, simple hooks +into the rest of the kernel, none in performance critical paths: + + - in init/main.c, to initialize the root cpuset at system boot. + - in fork and exit, to attach and detach a task from its cpuset. + - in sched_setaffinity, to mask the requested CPUs by what's + allowed in that task's cpuset. + - in sched.c migrate_live_tasks(), to keep migrating tasks within + the CPUs allowed by their cpuset, if possible. + - in the mbind and set_mempolicy system calls, to mask the requested + Memory Nodes by what's allowed in that task's cpuset. + - in page_alloc.c, to restrict memory to allowed nodes. + - in vmscan.c, to restrict page recovery to the current cpuset. + +You should mount the "cgroup" filesystem type in order to enable +browsing and modifying the cpusets presently known to the kernel. No +new system calls are added for cpusets - all support for querying and +modifying cpusets is via this cpuset file system. + +The /proc//status file for each task has four added lines, +displaying the task's cpus_allowed (on which CPUs it may be scheduled) +and mems_allowed (on which Memory Nodes it may obtain memory), +in the two formats seen in the following example:: + + Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff + Cpus_allowed_list: 0-127 + Mems_allowed: ffffffff,ffffffff + Mems_allowed_list: 0-63 + +Each cpuset is represented by a directory in the cgroup file system +containing (on top of the standard cgroup files) the following +files describing that cpuset: + + - cpuset.cpus: list of CPUs in that cpuset + - cpuset.mems: list of Memory Nodes in that cpuset + - cpuset.memory_migrate flag: if set, move pages to cpusets nodes + - cpuset.cpu_exclusive flag: is cpu placement exclusive? + - cpuset.mem_exclusive flag: is memory placement exclusive? + - cpuset.mem_hardwall flag: is memory allocation hardwalled + - cpuset.memory_pressure: measure of how much paging pressure in cpuset + - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes + - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes + - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset + - cpuset.sched_relax_domain_level: the searching range when migrating tasks + +In addition, only the root cpuset has the following file: + + - cpuset.memory_pressure_enabled flag: compute memory_pressure? + +New cpusets are created using the mkdir system call or shell +command. The properties of a cpuset, such as its flags, allowed +CPUs and Memory Nodes, and attached tasks, are modified by writing +to the appropriate file in that cpusets directory, as listed above. + +The named hierarchical structure of nested cpusets allows partitioning +a large system into nested, dynamically changeable, "soft-partitions". + +The attachment of each task, automatically inherited at fork by any +children of that task, to a cpuset allows organizing the work load +on a system into related sets of tasks such that each set is constrained +to using the CPUs and Memory Nodes of a particular cpuset. A task +may be re-attached to any other cpuset, if allowed by the permissions +on the necessary cpuset file system directories. + +Such management of a system "in the large" integrates smoothly with +the detailed placement done on individual tasks and memory regions +using the sched_setaffinity, mbind and set_mempolicy system calls. + +The following rules apply to each cpuset: + + - Its CPUs and Memory Nodes must be a subset of its parents. + - It can't be marked exclusive unless its parent is. + - If its cpu or memory is exclusive, they may not overlap any sibling. + +These rules, and the natural hierarchy of cpusets, enable efficient +enforcement of the exclusive guarantee, without having to scan all +cpusets every time any of them change to ensure nothing overlaps a +exclusive cpuset. Also, the use of a Linux virtual file system (vfs) +to represent the cpuset hierarchy provides for a familiar permission +and name space for cpusets, with a minimum of additional kernel code. + +The cpus and mems files in the root (top_cpuset) cpuset are +read-only. The cpus file automatically tracks the value of +cpu_online_mask using a CPU hotplug notifier, and the mems file +automatically tracks the value of node_states[N_MEMORY]--i.e., +nodes with memory--using the cpuset_track_online_nodes() hook. + + +1.4 What are exclusive cpusets ? +-------------------------------- + +If a cpuset is cpu or mem exclusive, no other cpuset, other than +a direct ancestor or descendant, may share any of the same CPUs or +Memory Nodes. + +A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled", +i.e. it restricts kernel allocations for page, buffer and other data +commonly shared by the kernel across multiple users. All cpusets, +whether hardwalled or not, restrict allocations of memory for user +space. This enables configuring a system so that several independent +jobs can share common kernel data, such as file system pages, while +isolating each job's user allocation in its own cpuset. To do this, +construct a large mem_exclusive cpuset to hold all the jobs, and +construct child, non-mem_exclusive cpusets for each individual job. +Only a small amount of typical kernel memory, such as requests from +interrupt handlers, is allowed to be taken outside even a +mem_exclusive cpuset. + + +1.5 What is memory_pressure ? +----------------------------- +The memory_pressure of a cpuset provides a simple per-cpuset metric +of the rate that the tasks in a cpuset are attempting to free up in +use memory on the nodes of the cpuset to satisfy additional memory +requests. + +This enables batch managers monitoring jobs running in dedicated +cpusets to efficiently detect what level of memory pressure that job +is causing. + +This is useful both on tightly managed systems running a wide mix of +submitted jobs, which may choose to terminate or re-prioritize jobs that +are trying to use more memory than allowed on the nodes assigned to them, +and with tightly coupled, long running, massively parallel scientific +computing jobs that will dramatically fail to meet required performance +goals if they start to use more memory than allowed to them. + +This mechanism provides a very economical way for the batch manager +to monitor a cpuset for signs of memory pressure. It's up to the +batch manager or other user code to decide what to do about it and +take action. + +==> + Unless this feature is enabled by writing "1" to the special file + /dev/cpuset/memory_pressure_enabled, the hook in the rebalance + code of __alloc_pages() for this metric reduces to simply noticing + that the cpuset_memory_pressure_enabled flag is zero. So only + systems that enable this feature will compute the metric. + +Why a per-cpuset, running average: + + Because this meter is per-cpuset, rather than per-task or mm, + the system load imposed by a batch scheduler monitoring this + metric is sharply reduced on large systems, because a scan of + the tasklist can be avoided on each set of queries. + + Because this meter is a running average, instead of an accumulating + counter, a batch scheduler can detect memory pressure with a + single read, instead of having to read and accumulate results + for a period of time. + + Because this meter is per-cpuset rather than per-task or mm, + the batch scheduler can obtain the key information, memory + pressure in a cpuset, with a single read, rather than having to + query and accumulate results over all the (dynamically changing) + set of tasks in the cpuset. + +A per-cpuset simple digital filter (requires a spinlock and 3 words +of data per-cpuset) is kept, and updated by any task attached to that +cpuset, if it enters the synchronous (direct) page reclaim code. + +A per-cpuset file provides an integer number representing the recent +(half-life of 10 seconds) rate of direct page reclaims caused by +the tasks in the cpuset, in units of reclaims attempted per second, +times 1000. + + +1.6 What is memory spread ? +--------------------------- +There are two boolean flag files per cpuset that control where the +kernel allocates pages for the file system buffers and related in +kernel data structures. They are called 'cpuset.memory_spread_page' and +'cpuset.memory_spread_slab'. + +If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then +the kernel will spread the file system buffers (page cache) evenly +over all the nodes that the faulting task is allowed to use, instead +of preferring to put those pages on the node where the task is running. + +If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set, +then the kernel will spread some file system related slab caches, +such as for inodes and dentries evenly over all the nodes that the +faulting task is allowed to use, instead of preferring to put those +pages on the node where the task is running. + +The setting of these flags does not affect anonymous data segment or +stack segment pages of a task. + +By default, both kinds of memory spreading are off, and memory +pages are allocated on the node local to where the task is running, +except perhaps as modified by the task's NUMA mempolicy or cpuset +configuration, so long as sufficient free memory pages are available. + +When new cpusets are created, they inherit the memory spread settings +of their parent. + +Setting memory spreading causes allocations for the affected page +or slab caches to ignore the task's NUMA mempolicy and be spread +instead. Tasks using mbind() or set_mempolicy() calls to set NUMA +mempolicies will not notice any change in these calls as a result of +their containing task's memory spread settings. If memory spreading +is turned off, then the currently specified NUMA mempolicy once again +applies to memory page allocations. + +Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag +files. By default they contain "0", meaning that the feature is off +for that cpuset. If a "1" is written to that file, then that turns +the named feature on. + +The implementation is simple. + +Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag +PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently +joins that cpuset. The page allocation calls for the page cache +is modified to perform an inline check for this PFA_SPREAD_PAGE task +flag, and if set, a call to a new routine cpuset_mem_spread_node() +returns the node to prefer for the allocation. + +Similarly, setting 'cpuset.memory_spread_slab' turns on the flag +PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate +pages from the node returned by cpuset_mem_spread_node(). + +The cpuset_mem_spread_node() routine is also simple. It uses the +value of a per-task rotor cpuset_mem_spread_rotor to select the next +node in the current task's mems_allowed to prefer for the allocation. + +This memory placement policy is also known (in other contexts) as +round-robin or interleave. + +This policy can provide substantial improvements for jobs that need +to place thread local data on the corresponding node, but that need +to access large file system data sets that need to be spread across +the several nodes in the jobs cpuset in order to fit. Without this +policy, especially for jobs that might have one thread reading in the +data set, the memory allocation across the nodes in the jobs cpuset +can become very uneven. + +1.7 What is sched_load_balance ? +-------------------------------- + +The kernel scheduler (kernel/sched/core.c) automatically load balances +tasks. If one CPU is underutilized, kernel code running on that +CPU will look for tasks on other more overloaded CPUs and move those +tasks to itself, within the constraints of such placement mechanisms +as cpusets and sched_setaffinity. + +The algorithmic cost of load balancing and its impact on key shared +kernel data structures such as the task list increases more than +linearly with the number of CPUs being balanced. So the scheduler +has support to partition the systems CPUs into a number of sched +domains such that it only load balances within each sched domain. +Each sched domain covers some subset of the CPUs in the system; +no two sched domains overlap; some CPUs might not be in any sched +domain and hence won't be load balanced. + +Put simply, it costs less to balance between two smaller sched domains +than one big one, but doing so means that overloads in one of the +two domains won't be load balanced to the other one. + +By default, there is one sched domain covering all CPUs, including those +marked isolated using the kernel boot time "isolcpus=" argument. However, +the isolated CPUs will not participate in load balancing, and will not +have tasks running on them unless explicitly assigned. + +This default load balancing across all CPUs is not well suited for +the following two situations: + + 1) On large systems, load balancing across many CPUs is expensive. + If the system is managed using cpusets to place independent jobs + on separate sets of CPUs, full load balancing is unnecessary. + 2) Systems supporting realtime on some CPUs need to minimize + system overhead on those CPUs, including avoiding task load + balancing if that is not needed. + +When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default +setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus' +be contained in a single sched domain, ensuring that load balancing +can move a task (not otherwised pinned, as by sched_setaffinity) +from any CPU in that cpuset to any other. + +When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the +scheduler will avoid load balancing across the CPUs in that cpuset, +--except-- in so far as is necessary because some overlapping cpuset +has "sched_load_balance" enabled. + +So, for example, if the top cpuset has the flag "cpuset.sched_load_balance" +enabled, then the scheduler will have one sched domain covering all +CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other +cpusets won't matter, as we're already fully load balancing. + +Therefore in the above two situations, the top cpuset flag +"cpuset.sched_load_balance" should be disabled, and only some of the smaller, +child cpusets have this flag enabled. + +When doing this, you don't usually want to leave any unpinned tasks in +the top cpuset that might use non-trivial amounts of CPU, as such tasks +may be artificially constrained to some subset of CPUs, depending on +the particulars of this flag setting in descendant cpusets. Even if +such a task could use spare CPU cycles in some other CPUs, the kernel +scheduler might not consider the possibility of load balancing that +task to that underused CPU. + +Of course, tasks pinned to a particular CPU can be left in a cpuset +that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere +else anyway. + +There is an impedance mismatch here, between cpusets and sched domains. +Cpusets are hierarchical and nest. Sched domains are flat; they don't +overlap and each CPU is in at most one sched domain. + +It is necessary for sched domains to be flat because load balancing +across partially overlapping sets of CPUs would risk unstable dynamics +that would be beyond our understanding. So if each of two partially +overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we +form a single sched domain that is a superset of both. We won't move +a task to a CPU outside its cpuset, but the scheduler load balancing +code might waste some compute cycles considering that possibility. + +This mismatch is why there is not a simple one-to-one relation +between which cpusets have the flag "cpuset.sched_load_balance" enabled, +and the sched domain configuration. If a cpuset enables the flag, it +will get balancing across all its CPUs, but if it disables the flag, +it will only be assured of no load balancing if no other overlapping +cpuset enables the flag. + +If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only +one of them has this flag enabled, then the other may find its +tasks only partially load balanced, just on the overlapping CPUs. +This is just the general case of the top_cpuset example given a few +paragraphs above. In the general case, as in the top cpuset case, +don't leave tasks that might use non-trivial amounts of CPU in +such partially load balanced cpusets, as they may be artificially +constrained to some subset of the CPUs allowed to them, for lack of +load balancing to the other CPUs. + +CPUs in "cpuset.isolcpus" were excluded from load balancing by the +isolcpus= kernel boot option, and will never be load balanced regardless +of the value of "cpuset.sched_load_balance" in any cpuset. + +1.7.1 sched_load_balance implementation details. +------------------------------------------------ + +The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary +to most cpuset flags.) When enabled for a cpuset, the kernel will +ensure that it can load balance across all the CPUs in that cpuset +(makes sure that all the CPUs in the cpus_allowed of that cpuset are +in the same sched domain.) + +If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled, +then they will be (must be) both in the same sched domain. + +If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled, +then by the above that means there is a single sched domain covering +the whole system, regardless of any other cpuset settings. + +The kernel commits to user space that it will avoid load balancing +where it can. It will pick as fine a granularity partition of sched +domains as it can while still providing load balancing for any set +of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled. + +The internal kernel cpuset to scheduler interface passes from the +cpuset code to the scheduler code a partition of the load balanced +CPUs in the system. This partition is a set of subsets (represented +as an array of struct cpumask) of CPUs, pairwise disjoint, that cover +all the CPUs that must be load balanced. + +The cpuset code builds a new such partition and passes it to the +scheduler sched domain setup code, to have the sched domains rebuilt +as necessary, whenever: + + - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes, + - or CPUs come or go from a cpuset with this flag enabled, + - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs + and with this flag enabled changes, + - or a cpuset with non-empty CPUs and with this flag enabled is removed, + - or a cpu is offlined/onlined. + +This partition exactly defines what sched domains the scheduler should +setup - one sched domain for each element (struct cpumask) in the +partition. + +The scheduler remembers the currently active sched domain partitions. +When the scheduler routine partition_sched_domains() is invoked from +the cpuset code to update these sched domains, it compares the new +partition requested with the current, and updates its sched domains, +removing the old and adding the new, for each change. + + +1.8 What is sched_relax_domain_level ? +-------------------------------------- + +In sched domain, the scheduler migrates tasks in 2 ways; periodic load +balance on tick, and at time of some schedule events. + +When a task is woken up, scheduler try to move the task on idle CPU. +For example, if a task A running on CPU X activates another task B +on the same CPU X, and if CPU Y is X's sibling and performing idle, +then scheduler migrate task B to CPU Y so that task B can start on +CPU Y without waiting task A on CPU X. + +And if a CPU run out of tasks in its runqueue, the CPU try to pull +extra tasks from other busy CPUs to help them before it is going to +be idle. + +Of course it takes some searching cost to find movable tasks and/or +idle CPUs, the scheduler might not search all CPUs in the domain +every time. In fact, in some architectures, the searching ranges on +events are limited in the same socket or node where the CPU locates, +while the load balance on tick searches all. + +For example, assume CPU Z is relatively far from CPU X. Even if CPU Z +is idle while CPU X and the siblings are busy, scheduler can't migrate +woken task B from X to Z since it is out of its searching range. +As the result, task B on CPU X need to wait task A or wait load balance +on the next tick. For some applications in special situation, waiting +1 tick may be too long. + +The 'cpuset.sched_relax_domain_level' file allows you to request changing +this searching range as you like. This file takes int value which +indicates size of searching range in levels ideally as follows, +otherwise initial value -1 that indicates the cpuset has no request. + +====== =========================================================== + -1 no request. use system default or follow request of others. + 0 no search. + 1 search siblings (hyperthreads in a core). + 2 search cores in a package. + 3 search cpus in a node [= system wide on non-NUMA system] + 4 search nodes in a chunk of node [on NUMA system] + 5 search system wide [on NUMA system] +====== =========================================================== + +The system default is architecture dependent. The system default +can be changed using the relax_domain_level= boot parameter. + +This file is per-cpuset and affect the sched domain where the cpuset +belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset +is disabled, then 'cpuset.sched_relax_domain_level' have no effect since +there is no sched domain belonging the cpuset. + +If multiple cpusets are overlapping and hence they form a single sched +domain, the largest value among those is used. Be careful, if one +requests 0 and others are -1 then 0 is used. + +Note that modifying this file will have both good and bad effects, +and whether it is acceptable or not depends on your situation. +Don't modify this file if you are not sure. + +If your situation is: + + - The migration costs between each cpu can be assumed considerably + small(for you) due to your special application's behavior or + special hardware support for CPU cache etc. + - The searching cost doesn't have impact(for you) or you can make + the searching cost enough small by managing cpuset to compact etc. + - The latency is required even it sacrifices cache hit rate etc. + then increasing 'sched_relax_domain_level' would benefit you. + + +1.9 How do I use cpusets ? +-------------------------- + +In order to minimize the impact of cpusets on critical kernel +code, such as the scheduler, and due to the fact that the kernel +does not support one task updating the memory placement of another +task directly, the impact on a task of changing its cpuset CPU +or Memory Node placement, or of changing to which cpuset a task +is attached, is subtle. + +If a cpuset has its Memory Nodes modified, then for each task attached +to that cpuset, the next time that the kernel attempts to allocate +a page of memory for that task, the kernel will notice the change +in the task's cpuset, and update its per-task memory placement to +remain within the new cpusets memory placement. If the task was using +mempolicy MPOL_BIND, and the nodes to which it was bound overlap with +its new cpuset, then the task will continue to use whatever subset +of MPOL_BIND nodes are still allowed in the new cpuset. If the task +was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed +in the new cpuset, then the task will be essentially treated as if it +was MPOL_BIND bound to the new cpuset (even though its NUMA placement, +as queried by get_mempolicy(), doesn't change). If a task is moved +from one cpuset to another, then the kernel will adjust the task's +memory placement, as above, the next time that the kernel attempts +to allocate a page of memory for that task. + +If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset +will have its allowed CPU placement changed immediately. Similarly, +if a task's pid is written to another cpuset's 'tasks' file, then its +allowed CPU placement is changed immediately. If such a task had been +bound to some subset of its cpuset using the sched_setaffinity() call, +the task will be allowed to run on any CPU allowed in its new cpuset, +negating the effect of the prior sched_setaffinity() call. + +In summary, the memory placement of a task whose cpuset is changed is +updated by the kernel, on the next allocation of a page for that task, +and the processor placement is updated immediately. + +Normally, once a page is allocated (given a physical page +of main memory) then that page stays on whatever node it +was allocated, so long as it remains allocated, even if the +cpusets memory placement policy 'cpuset.mems' subsequently changes. +If the cpuset flag file 'cpuset.memory_migrate' is set true, then when +tasks are attached to that cpuset, any pages that task had +allocated to it on nodes in its previous cpuset are migrated +to the task's new cpuset. The relative placement of the page within +the cpuset is preserved during these migration operations if possible. +For example if the page was on the second valid node of the prior cpuset +then the page will be placed on the second valid node of the new cpuset. + +Also if 'cpuset.memory_migrate' is set true, then if that cpuset's +'cpuset.mems' file is modified, pages allocated to tasks in that +cpuset, that were on nodes in the previous setting of 'cpuset.mems', +will be moved to nodes in the new setting of 'mems.' +Pages that were not in the task's prior cpuset, or in the cpuset's +prior 'cpuset.mems' setting, will not be moved. + +There is an exception to the above. If hotplug functionality is used +to remove all the CPUs that are currently assigned to a cpuset, +then all the tasks in that cpuset will be moved to the nearest ancestor +with non-empty cpus. But the moving of some (or all) tasks might fail if +cpuset is bound with another cgroup subsystem which has some restrictions +on task attaching. In this failing case, those tasks will stay +in the original cpuset, and the kernel will automatically update +their cpus_allowed to allow all online CPUs. When memory hotplug +functionality for removing Memory Nodes is available, a similar exception +is expected to apply there as well. In general, the kernel prefers to +violate cpuset placement, over starving a task that has had all +its allowed CPUs or Memory Nodes taken offline. + +There is a second exception to the above. GFP_ATOMIC requests are +kernel internal allocations that must be satisfied, immediately. +The kernel may drop some request, in rare cases even panic, if a +GFP_ATOMIC alloc fails. If the request cannot be satisfied within +the current task's cpuset, then we relax the cpuset, and look for +memory anywhere we can find it. It's better to violate the cpuset +than stress the kernel. + +To start a new job that is to be contained within a cpuset, the steps are: + + 1) mkdir /sys/fs/cgroup/cpuset + 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + 3) Create the new cpuset by doing mkdir's and write's (or echo's) in + the /sys/fs/cgroup/cpuset virtual file system. + 4) Start a task that will be the "founding father" of the new job. + 5) Attach that task to the new cpuset by writing its pid to the + /sys/fs/cgroup/cpuset tasks file for that cpuset. + 6) fork, exec or clone the job tasks from this founding father task. + +For example, the following sequence of commands will setup a cpuset +named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, +and then start a subshell 'sh' in that cpuset:: + + mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset + mkdir Charlie + cd Charlie + /bin/echo 2-3 > cpuset.cpus + /bin/echo 1 > cpuset.mems + /bin/echo $$ > tasks + sh + # The subshell 'sh' is now running in cpuset Charlie + # The next line should display '/Charlie' + cat /proc/self/cpuset + +There are ways to query or modify cpusets: + + - via the cpuset file system directly, using the various cd, mkdir, echo, + cat, rmdir commands from the shell, or their equivalent from C. + - via the C library libcpuset. + - via the C library libcgroup. + (http://sourceforge.net/projects/libcg/) + - via the python application cset. + (http://code.google.com/p/cpuset/) + +The sched_setaffinity calls can also be done at the shell prompt using +SGI's runon or Robert Love's taskset. The mbind and set_mempolicy +calls can be done at the shell prompt using the numactl command +(part of Andi Kleen's numa package). + +2. Usage Examples and Syntax +============================ + +2.1 Basic Usage +--------------- + +Creating, modifying, using the cpusets can be done through the cpuset +virtual filesystem. + +To mount it, type: +# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset + +Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the +tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset +is the cpuset that holds the whole system. + +If you want to create a new cpuset under /sys/fs/cgroup/cpuset:: + + # cd /sys/fs/cgroup/cpuset + # mkdir my_cpuset + +Now you want to do something with this cpuset:: + + # cd my_cpuset + +In this directory you can find several files:: + + # ls + cgroup.clone_children cpuset.memory_pressure + cgroup.event_control cpuset.memory_spread_page + cgroup.procs cpuset.memory_spread_slab + cpuset.cpu_exclusive cpuset.mems + cpuset.cpus cpuset.sched_load_balance + cpuset.mem_exclusive cpuset.sched_relax_domain_level + cpuset.mem_hardwall notify_on_release + cpuset.memory_migrate tasks + +Reading them will give you information about the state of this cpuset: +the CPUs and Memory Nodes it can use, the processes that are using +it, its properties. By writing to these files you can manipulate +the cpuset. + +Set some flags:: + + # /bin/echo 1 > cpuset.cpu_exclusive + +Add some cpus:: + + # /bin/echo 0-7 > cpuset.cpus + +Add some mems:: + + # /bin/echo 0-7 > cpuset.mems + +Now attach your shell to this cpuset:: + + # /bin/echo $$ > tasks + +You can also create cpusets inside your cpuset by using mkdir in this +directory:: + + # mkdir my_sub_cs + +To remove a cpuset, just use rmdir:: + + # rmdir my_sub_cs + +This will fail if the cpuset is in use (has cpusets inside, or has +processes attached). + +Note that for legacy reasons, the "cpuset" filesystem exists as a +wrapper around the cgroup filesystem. + +The command:: + + mount -t cpuset X /sys/fs/cgroup/cpuset + +is equivalent to:: + + mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset + echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent + +2.2 Adding/removing cpus +------------------------ + +This is the syntax to use when writing in the cpus or mems files +in cpuset directories:: + + # /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 + # /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 + +To add a CPU to a cpuset, write the new list of CPUs including the +CPU to be added. To add 6 to the above cpuset:: + + # /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6 + +Similarly to remove a CPU from a cpuset, write the new list of CPUs +without the CPU to be removed. + +To remove all the CPUs:: + + # /bin/echo "" > cpuset.cpus -> clear cpus list + +2.3 Setting flags +----------------- + +The syntax is very simple:: + + # /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive' + # /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive' + +2.4 Attaching processes +----------------------- + +:: + + # /bin/echo PID > tasks + +Note that it is PID, not PIDs. You can only attach ONE task at a time. +If you have several tasks to attach, you have to do it one after another:: + + # /bin/echo PID1 > tasks + # /bin/echo PID2 > tasks + ... + # /bin/echo PIDn > tasks + + +3. Questions +============ + +Q: + what's up with this '/bin/echo' ? + +A: + bash's builtin 'echo' command does not check calls to write() against + errors. If you use it in the cpuset file system, you won't be + able to tell whether a command succeeded or failed. + +Q: + When I attach processes, only the first of the line gets really attached ! + +A: + We can only return one error code per call to write(). So you should also + put only ONE pid. + +4. Contact +========== + +Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt deleted file mode 100644 index 8402dd6de8df..000000000000 --- a/Documentation/cgroup-v1/cpusets.txt +++ /dev/null @@ -1,839 +0,0 @@ - CPUSETS - ------- - -Copyright (C) 2004 BULL SA. -Written by Simon.Derr@bull.net - -Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. -Modified by Paul Jackson -Modified by Christoph Lameter -Modified by Paul Menage -Modified by Hidetoshi Seto - -CONTENTS: -========= - -1. Cpusets - 1.1 What are cpusets ? - 1.2 Why are cpusets needed ? - 1.3 How are cpusets implemented ? - 1.4 What are exclusive cpusets ? - 1.5 What is memory_pressure ? - 1.6 What is memory spread ? - 1.7 What is sched_load_balance ? - 1.8 What is sched_relax_domain_level ? - 1.9 How do I use cpusets ? -2. Usage Examples and Syntax - 2.1 Basic Usage - 2.2 Adding/removing cpus - 2.3 Setting flags - 2.4 Attaching processes -3. Questions -4. Contact - -1. Cpusets -========== - -1.1 What are cpusets ? ----------------------- - -Cpusets provide a mechanism for assigning a set of CPUs and Memory -Nodes to a set of tasks. In this document "Memory Node" refers to -an on-line node that contains memory. - -Cpusets constrain the CPU and Memory placement of tasks to only -the resources within a task's current cpuset. They form a nested -hierarchy visible in a virtual file system. These are the essential -hooks, beyond what is already present, required to manage dynamic -job placement on large systems. - -Cpusets use the generic cgroup subsystem described in -Documentation/cgroup-v1/cgroups.txt. - -Requests by a task, using the sched_setaffinity(2) system call to -include CPUs in its CPU affinity mask, and using the mbind(2) and -set_mempolicy(2) system calls to include Memory Nodes in its memory -policy, are both filtered through that task's cpuset, filtering out any -CPUs or Memory Nodes not in that cpuset. The scheduler will not -schedule a task on a CPU that is not allowed in its cpus_allowed -vector, and the kernel page allocator will not allocate a page on a -node that is not allowed in the requesting task's mems_allowed vector. - -User level code may create and destroy cpusets by name in the cgroup -virtual file system, manage the attributes and permissions of these -cpusets and which CPUs and Memory Nodes are assigned to each cpuset, -specify and query to which cpuset a task is assigned, and list the -task pids assigned to a cpuset. - - -1.2 Why are cpusets needed ? ----------------------------- - -The management of large computer systems, with many processors (CPUs), -complex memory cache hierarchies and multiple Memory Nodes having -non-uniform access times (NUMA) presents additional challenges for -the efficient scheduling and memory placement of processes. - -Frequently more modest sized systems can be operated with adequate -efficiency just by letting the operating system automatically share -the available CPU and Memory resources amongst the requesting tasks. - -But larger systems, which benefit more from careful processor and -memory placement to reduce memory access times and contention, -and which typically represent a larger investment for the customer, -can benefit from explicitly placing jobs on properly sized subsets of -the system. - -This can be especially valuable on: - - * Web Servers running multiple instances of the same web application, - * Servers running different applications (for instance, a web server - and a database), or - * NUMA systems running large HPC applications with demanding - performance characteristics. - -These subsets, or "soft partitions" must be able to be dynamically -adjusted, as the job mix changes, without impacting other concurrently -executing jobs. The location of the running jobs pages may also be moved -when the memory locations are changed. - -The kernel cpuset patch provides the minimum essential kernel -mechanisms required to efficiently implement such subsets. It -leverages existing CPU and Memory Placement facilities in the Linux -kernel to avoid any additional impact on the critical scheduler or -memory allocator code. - - -1.3 How are cpusets implemented ? ---------------------------------- - -Cpusets provide a Linux kernel mechanism to constrain which CPUs and -Memory Nodes are used by a process or set of processes. - -The Linux kernel already has a pair of mechanisms to specify on which -CPUs a task may be scheduled (sched_setaffinity) and on which Memory -Nodes it may obtain memory (mbind, set_mempolicy). - -Cpusets extends these two mechanisms as follows: - - - Cpusets are sets of allowed CPUs and Memory Nodes, known to the - kernel. - - Each task in the system is attached to a cpuset, via a pointer - in the task structure to a reference counted cgroup structure. - - Calls to sched_setaffinity are filtered to just those CPUs - allowed in that task's cpuset. - - Calls to mbind and set_mempolicy are filtered to just - those Memory Nodes allowed in that task's cpuset. - - The root cpuset contains all the systems CPUs and Memory - Nodes. - - For any cpuset, one can define child cpusets containing a subset - of the parents CPU and Memory Node resources. - - The hierarchy of cpusets can be mounted at /dev/cpuset, for - browsing and manipulation from user space. - - A cpuset may be marked exclusive, which ensures that no other - cpuset (except direct ancestors and descendants) may contain - any overlapping CPUs or Memory Nodes. - - You can list all the tasks (by pid) attached to any cpuset. - -The implementation of cpusets requires a few, simple hooks -into the rest of the kernel, none in performance critical paths: - - - in init/main.c, to initialize the root cpuset at system boot. - - in fork and exit, to attach and detach a task from its cpuset. - - in sched_setaffinity, to mask the requested CPUs by what's - allowed in that task's cpuset. - - in sched.c migrate_live_tasks(), to keep migrating tasks within - the CPUs allowed by their cpuset, if possible. - - in the mbind and set_mempolicy system calls, to mask the requested - Memory Nodes by what's allowed in that task's cpuset. - - in page_alloc.c, to restrict memory to allowed nodes. - - in vmscan.c, to restrict page recovery to the current cpuset. - -You should mount the "cgroup" filesystem type in order to enable -browsing and modifying the cpusets presently known to the kernel. No -new system calls are added for cpusets - all support for querying and -modifying cpusets is via this cpuset file system. - -The /proc//status file for each task has four added lines, -displaying the task's cpus_allowed (on which CPUs it may be scheduled) -and mems_allowed (on which Memory Nodes it may obtain memory), -in the two formats seen in the following example: - - Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff - Cpus_allowed_list: 0-127 - Mems_allowed: ffffffff,ffffffff - Mems_allowed_list: 0-63 - -Each cpuset is represented by a directory in the cgroup file system -containing (on top of the standard cgroup files) the following -files describing that cpuset: - - - cpuset.cpus: list of CPUs in that cpuset - - cpuset.mems: list of Memory Nodes in that cpuset - - cpuset.memory_migrate flag: if set, move pages to cpusets nodes - - cpuset.cpu_exclusive flag: is cpu placement exclusive? - - cpuset.mem_exclusive flag: is memory placement exclusive? - - cpuset.mem_hardwall flag: is memory allocation hardwalled - - cpuset.memory_pressure: measure of how much paging pressure in cpuset - - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes - - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes - - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset - - cpuset.sched_relax_domain_level: the searching range when migrating tasks - -In addition, only the root cpuset has the following file: - - cpuset.memory_pressure_enabled flag: compute memory_pressure? - -New cpusets are created using the mkdir system call or shell -command. The properties of a cpuset, such as its flags, allowed -CPUs and Memory Nodes, and attached tasks, are modified by writing -to the appropriate file in that cpusets directory, as listed above. - -The named hierarchical structure of nested cpusets allows partitioning -a large system into nested, dynamically changeable, "soft-partitions". - -The attachment of each task, automatically inherited at fork by any -children of that task, to a cpuset allows organizing the work load -on a system into related sets of tasks such that each set is constrained -to using the CPUs and Memory Nodes of a particular cpuset. A task -may be re-attached to any other cpuset, if allowed by the permissions -on the necessary cpuset file system directories. - -Such management of a system "in the large" integrates smoothly with -the detailed placement done on individual tasks and memory regions -using the sched_setaffinity, mbind and set_mempolicy system calls. - -The following rules apply to each cpuset: - - - Its CPUs and Memory Nodes must be a subset of its parents. - - It can't be marked exclusive unless its parent is. - - If its cpu or memory is exclusive, they may not overlap any sibling. - -These rules, and the natural hierarchy of cpusets, enable efficient -enforcement of the exclusive guarantee, without having to scan all -cpusets every time any of them change to ensure nothing overlaps a -exclusive cpuset. Also, the use of a Linux virtual file system (vfs) -to represent the cpuset hierarchy provides for a familiar permission -and name space for cpusets, with a minimum of additional kernel code. - -The cpus and mems files in the root (top_cpuset) cpuset are -read-only. The cpus file automatically tracks the value of -cpu_online_mask using a CPU hotplug notifier, and the mems file -automatically tracks the value of node_states[N_MEMORY]--i.e., -nodes with memory--using the cpuset_track_online_nodes() hook. - - -1.4 What are exclusive cpusets ? --------------------------------- - -If a cpuset is cpu or mem exclusive, no other cpuset, other than -a direct ancestor or descendant, may share any of the same CPUs or -Memory Nodes. - -A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled", -i.e. it restricts kernel allocations for page, buffer and other data -commonly shared by the kernel across multiple users. All cpusets, -whether hardwalled or not, restrict allocations of memory for user -space. This enables configuring a system so that several independent -jobs can share common kernel data, such as file system pages, while -isolating each job's user allocation in its own cpuset. To do this, -construct a large mem_exclusive cpuset to hold all the jobs, and -construct child, non-mem_exclusive cpusets for each individual job. -Only a small amount of typical kernel memory, such as requests from -interrupt handlers, is allowed to be taken outside even a -mem_exclusive cpuset. - - -1.5 What is memory_pressure ? ------------------------------ -The memory_pressure of a cpuset provides a simple per-cpuset metric -of the rate that the tasks in a cpuset are attempting to free up in -use memory on the nodes of the cpuset to satisfy additional memory -requests. - -This enables batch managers monitoring jobs running in dedicated -cpusets to efficiently detect what level of memory pressure that job -is causing. - -This is useful both on tightly managed systems running a wide mix of -submitted jobs, which may choose to terminate or re-prioritize jobs that -are trying to use more memory than allowed on the nodes assigned to them, -and with tightly coupled, long running, massively parallel scientific -computing jobs that will dramatically fail to meet required performance -goals if they start to use more memory than allowed to them. - -This mechanism provides a very economical way for the batch manager -to monitor a cpuset for signs of memory pressure. It's up to the -batch manager or other user code to decide what to do about it and -take action. - -==> Unless this feature is enabled by writing "1" to the special file - /dev/cpuset/memory_pressure_enabled, the hook in the rebalance - code of __alloc_pages() for this metric reduces to simply noticing - that the cpuset_memory_pressure_enabled flag is zero. So only - systems that enable this feature will compute the metric. - -Why a per-cpuset, running average: - - Because this meter is per-cpuset, rather than per-task or mm, - the system load imposed by a batch scheduler monitoring this - metric is sharply reduced on large systems, because a scan of - the tasklist can be avoided on each set of queries. - - Because this meter is a running average, instead of an accumulating - counter, a batch scheduler can detect memory pressure with a - single read, instead of having to read and accumulate results - for a period of time. - - Because this meter is per-cpuset rather than per-task or mm, - the batch scheduler can obtain the key information, memory - pressure in a cpuset, with a single read, rather than having to - query and accumulate results over all the (dynamically changing) - set of tasks in the cpuset. - -A per-cpuset simple digital filter (requires a spinlock and 3 words -of data per-cpuset) is kept, and updated by any task attached to that -cpuset, if it enters the synchronous (direct) page reclaim code. - -A per-cpuset file provides an integer number representing the recent -(half-life of 10 seconds) rate of direct page reclaims caused by -the tasks in the cpuset, in units of reclaims attempted per second, -times 1000. - - -1.6 What is memory spread ? ---------------------------- -There are two boolean flag files per cpuset that control where the -kernel allocates pages for the file system buffers and related in -kernel data structures. They are called 'cpuset.memory_spread_page' and -'cpuset.memory_spread_slab'. - -If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then -the kernel will spread the file system buffers (page cache) evenly -over all the nodes that the faulting task is allowed to use, instead -of preferring to put those pages on the node where the task is running. - -If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set, -then the kernel will spread some file system related slab caches, -such as for inodes and dentries evenly over all the nodes that the -faulting task is allowed to use, instead of preferring to put those -pages on the node where the task is running. - -The setting of these flags does not affect anonymous data segment or -stack segment pages of a task. - -By default, both kinds of memory spreading are off, and memory -pages are allocated on the node local to where the task is running, -except perhaps as modified by the task's NUMA mempolicy or cpuset -configuration, so long as sufficient free memory pages are available. - -When new cpusets are created, they inherit the memory spread settings -of their parent. - -Setting memory spreading causes allocations for the affected page -or slab caches to ignore the task's NUMA mempolicy and be spread -instead. Tasks using mbind() or set_mempolicy() calls to set NUMA -mempolicies will not notice any change in these calls as a result of -their containing task's memory spread settings. If memory spreading -is turned off, then the currently specified NUMA mempolicy once again -applies to memory page allocations. - -Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag -files. By default they contain "0", meaning that the feature is off -for that cpuset. If a "1" is written to that file, then that turns -the named feature on. - -The implementation is simple. - -Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag -PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently -joins that cpuset. The page allocation calls for the page cache -is modified to perform an inline check for this PFA_SPREAD_PAGE task -flag, and if set, a call to a new routine cpuset_mem_spread_node() -returns the node to prefer for the allocation. - -Similarly, setting 'cpuset.memory_spread_slab' turns on the flag -PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate -pages from the node returned by cpuset_mem_spread_node(). - -The cpuset_mem_spread_node() routine is also simple. It uses the -value of a per-task rotor cpuset_mem_spread_rotor to select the next -node in the current task's mems_allowed to prefer for the allocation. - -This memory placement policy is also known (in other contexts) as -round-robin or interleave. - -This policy can provide substantial improvements for jobs that need -to place thread local data on the corresponding node, but that need -to access large file system data sets that need to be spread across -the several nodes in the jobs cpuset in order to fit. Without this -policy, especially for jobs that might have one thread reading in the -data set, the memory allocation across the nodes in the jobs cpuset -can become very uneven. - -1.7 What is sched_load_balance ? --------------------------------- - -The kernel scheduler (kernel/sched/core.c) automatically load balances -tasks. If one CPU is underutilized, kernel code running on that -CPU will look for tasks on other more overloaded CPUs and move those -tasks to itself, within the constraints of such placement mechanisms -as cpusets and sched_setaffinity. - -The algorithmic cost of load balancing and its impact on key shared -kernel data structures such as the task list increases more than -linearly with the number of CPUs being balanced. So the scheduler -has support to partition the systems CPUs into a number of sched -domains such that it only load balances within each sched domain. -Each sched domain covers some subset of the CPUs in the system; -no two sched domains overlap; some CPUs might not be in any sched -domain and hence won't be load balanced. - -Put simply, it costs less to balance between two smaller sched domains -than one big one, but doing so means that overloads in one of the -two domains won't be load balanced to the other one. - -By default, there is one sched domain covering all CPUs, including those -marked isolated using the kernel boot time "isolcpus=" argument. However, -the isolated CPUs will not participate in load balancing, and will not -have tasks running on them unless explicitly assigned. - -This default load balancing across all CPUs is not well suited for -the following two situations: - 1) On large systems, load balancing across many CPUs is expensive. - If the system is managed using cpusets to place independent jobs - on separate sets of CPUs, full load balancing is unnecessary. - 2) Systems supporting realtime on some CPUs need to minimize - system overhead on those CPUs, including avoiding task load - balancing if that is not needed. - -When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default -setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus' -be contained in a single sched domain, ensuring that load balancing -can move a task (not otherwised pinned, as by sched_setaffinity) -from any CPU in that cpuset to any other. - -When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the -scheduler will avoid load balancing across the CPUs in that cpuset, ---except-- in so far as is necessary because some overlapping cpuset -has "sched_load_balance" enabled. - -So, for example, if the top cpuset has the flag "cpuset.sched_load_balance" -enabled, then the scheduler will have one sched domain covering all -CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other -cpusets won't matter, as we're already fully load balancing. - -Therefore in the above two situations, the top cpuset flag -"cpuset.sched_load_balance" should be disabled, and only some of the smaller, -child cpusets have this flag enabled. - -When doing this, you don't usually want to leave any unpinned tasks in -the top cpuset that might use non-trivial amounts of CPU, as such tasks -may be artificially constrained to some subset of CPUs, depending on -the particulars of this flag setting in descendant cpusets. Even if -such a task could use spare CPU cycles in some other CPUs, the kernel -scheduler might not consider the possibility of load balancing that -task to that underused CPU. - -Of course, tasks pinned to a particular CPU can be left in a cpuset -that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere -else anyway. - -There is an impedance mismatch here, between cpusets and sched domains. -Cpusets are hierarchical and nest. Sched domains are flat; they don't -overlap and each CPU is in at most one sched domain. - -It is necessary for sched domains to be flat because load balancing -across partially overlapping sets of CPUs would risk unstable dynamics -that would be beyond our understanding. So if each of two partially -overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we -form a single sched domain that is a superset of both. We won't move -a task to a CPU outside its cpuset, but the scheduler load balancing -code might waste some compute cycles considering that possibility. - -This mismatch is why there is not a simple one-to-one relation -between which cpusets have the flag "cpuset.sched_load_balance" enabled, -and the sched domain configuration. If a cpuset enables the flag, it -will get balancing across all its CPUs, but if it disables the flag, -it will only be assured of no load balancing if no other overlapping -cpuset enables the flag. - -If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only -one of them has this flag enabled, then the other may find its -tasks only partially load balanced, just on the overlapping CPUs. -This is just the general case of the top_cpuset example given a few -paragraphs above. In the general case, as in the top cpuset case, -don't leave tasks that might use non-trivial amounts of CPU in -such partially load balanced cpusets, as they may be artificially -constrained to some subset of the CPUs allowed to them, for lack of -load balancing to the other CPUs. - -CPUs in "cpuset.isolcpus" were excluded from load balancing by the -isolcpus= kernel boot option, and will never be load balanced regardless -of the value of "cpuset.sched_load_balance" in any cpuset. - -1.7.1 sched_load_balance implementation details. ------------------------------------------------- - -The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary -to most cpuset flags.) When enabled for a cpuset, the kernel will -ensure that it can load balance across all the CPUs in that cpuset -(makes sure that all the CPUs in the cpus_allowed of that cpuset are -in the same sched domain.) - -If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled, -then they will be (must be) both in the same sched domain. - -If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled, -then by the above that means there is a single sched domain covering -the whole system, regardless of any other cpuset settings. - -The kernel commits to user space that it will avoid load balancing -where it can. It will pick as fine a granularity partition of sched -domains as it can while still providing load balancing for any set -of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled. - -The internal kernel cpuset to scheduler interface passes from the -cpuset code to the scheduler code a partition of the load balanced -CPUs in the system. This partition is a set of subsets (represented -as an array of struct cpumask) of CPUs, pairwise disjoint, that cover -all the CPUs that must be load balanced. - -The cpuset code builds a new such partition and passes it to the -scheduler sched domain setup code, to have the sched domains rebuilt -as necessary, whenever: - - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes, - - or CPUs come or go from a cpuset with this flag enabled, - - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs - and with this flag enabled changes, - - or a cpuset with non-empty CPUs and with this flag enabled is removed, - - or a cpu is offlined/onlined. - -This partition exactly defines what sched domains the scheduler should -setup - one sched domain for each element (struct cpumask) in the -partition. - -The scheduler remembers the currently active sched domain partitions. -When the scheduler routine partition_sched_domains() is invoked from -the cpuset code to update these sched domains, it compares the new -partition requested with the current, and updates its sched domains, -removing the old and adding the new, for each change. - - -1.8 What is sched_relax_domain_level ? --------------------------------------- - -In sched domain, the scheduler migrates tasks in 2 ways; periodic load -balance on tick, and at time of some schedule events. - -When a task is woken up, scheduler try to move the task on idle CPU. -For example, if a task A running on CPU X activates another task B -on the same CPU X, and if CPU Y is X's sibling and performing idle, -then scheduler migrate task B to CPU Y so that task B can start on -CPU Y without waiting task A on CPU X. - -And if a CPU run out of tasks in its runqueue, the CPU try to pull -extra tasks from other busy CPUs to help them before it is going to -be idle. - -Of course it takes some searching cost to find movable tasks and/or -idle CPUs, the scheduler might not search all CPUs in the domain -every time. In fact, in some architectures, the searching ranges on -events are limited in the same socket or node where the CPU locates, -while the load balance on tick searches all. - -For example, assume CPU Z is relatively far from CPU X. Even if CPU Z -is idle while CPU X and the siblings are busy, scheduler can't migrate -woken task B from X to Z since it is out of its searching range. -As the result, task B on CPU X need to wait task A or wait load balance -on the next tick. For some applications in special situation, waiting -1 tick may be too long. - -The 'cpuset.sched_relax_domain_level' file allows you to request changing -this searching range as you like. This file takes int value which -indicates size of searching range in levels ideally as follows, -otherwise initial value -1 that indicates the cpuset has no request. - - -1 : no request. use system default or follow request of others. - 0 : no search. - 1 : search siblings (hyperthreads in a core). - 2 : search cores in a package. - 3 : search cpus in a node [= system wide on non-NUMA system] - 4 : search nodes in a chunk of node [on NUMA system] - 5 : search system wide [on NUMA system] - -The system default is architecture dependent. The system default -can be changed using the relax_domain_level= boot parameter. - -This file is per-cpuset and affect the sched domain where the cpuset -belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset -is disabled, then 'cpuset.sched_relax_domain_level' have no effect since -there is no sched domain belonging the cpuset. - -If multiple cpusets are overlapping and hence they form a single sched -domain, the largest value among those is used. Be careful, if one -requests 0 and others are -1 then 0 is used. - -Note that modifying this file will have both good and bad effects, -and whether it is acceptable or not depends on your situation. -Don't modify this file if you are not sure. - -If your situation is: - - The migration costs between each cpu can be assumed considerably - small(for you) due to your special application's behavior or - special hardware support for CPU cache etc. - - The searching cost doesn't have impact(for you) or you can make - the searching cost enough small by managing cpuset to compact etc. - - The latency is required even it sacrifices cache hit rate etc. -then increasing 'sched_relax_domain_level' would benefit you. - - -1.9 How do I use cpusets ? --------------------------- - -In order to minimize the impact of cpusets on critical kernel -code, such as the scheduler, and due to the fact that the kernel -does not support one task updating the memory placement of another -task directly, the impact on a task of changing its cpuset CPU -or Memory Node placement, or of changing to which cpuset a task -is attached, is subtle. - -If a cpuset has its Memory Nodes modified, then for each task attached -to that cpuset, the next time that the kernel attempts to allocate -a page of memory for that task, the kernel will notice the change -in the task's cpuset, and update its per-task memory placement to -remain within the new cpusets memory placement. If the task was using -mempolicy MPOL_BIND, and the nodes to which it was bound overlap with -its new cpuset, then the task will continue to use whatever subset -of MPOL_BIND nodes are still allowed in the new cpuset. If the task -was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed -in the new cpuset, then the task will be essentially treated as if it -was MPOL_BIND bound to the new cpuset (even though its NUMA placement, -as queried by get_mempolicy(), doesn't change). If a task is moved -from one cpuset to another, then the kernel will adjust the task's -memory placement, as above, the next time that the kernel attempts -to allocate a page of memory for that task. - -If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset -will have its allowed CPU placement changed immediately. Similarly, -if a task's pid is written to another cpuset's 'tasks' file, then its -allowed CPU placement is changed immediately. If such a task had been -bound to some subset of its cpuset using the sched_setaffinity() call, -the task will be allowed to run on any CPU allowed in its new cpuset, -negating the effect of the prior sched_setaffinity() call. - -In summary, the memory placement of a task whose cpuset is changed is -updated by the kernel, on the next allocation of a page for that task, -and the processor placement is updated immediately. - -Normally, once a page is allocated (given a physical page -of main memory) then that page stays on whatever node it -was allocated, so long as it remains allocated, even if the -cpusets memory placement policy 'cpuset.mems' subsequently changes. -If the cpuset flag file 'cpuset.memory_migrate' is set true, then when -tasks are attached to that cpuset, any pages that task had -allocated to it on nodes in its previous cpuset are migrated -to the task's new cpuset. The relative placement of the page within -the cpuset is preserved during these migration operations if possible. -For example if the page was on the second valid node of the prior cpuset -then the page will be placed on the second valid node of the new cpuset. - -Also if 'cpuset.memory_migrate' is set true, then if that cpuset's -'cpuset.mems' file is modified, pages allocated to tasks in that -cpuset, that were on nodes in the previous setting of 'cpuset.mems', -will be moved to nodes in the new setting of 'mems.' -Pages that were not in the task's prior cpuset, or in the cpuset's -prior 'cpuset.mems' setting, will not be moved. - -There is an exception to the above. If hotplug functionality is used -to remove all the CPUs that are currently assigned to a cpuset, -then all the tasks in that cpuset will be moved to the nearest ancestor -with non-empty cpus. But the moving of some (or all) tasks might fail if -cpuset is bound with another cgroup subsystem which has some restrictions -on task attaching. In this failing case, those tasks will stay -in the original cpuset, and the kernel will automatically update -their cpus_allowed to allow all online CPUs. When memory hotplug -functionality for removing Memory Nodes is available, a similar exception -is expected to apply there as well. In general, the kernel prefers to -violate cpuset placement, over starving a task that has had all -its allowed CPUs or Memory Nodes taken offline. - -There is a second exception to the above. GFP_ATOMIC requests are -kernel internal allocations that must be satisfied, immediately. -The kernel may drop some request, in rare cases even panic, if a -GFP_ATOMIC alloc fails. If the request cannot be satisfied within -the current task's cpuset, then we relax the cpuset, and look for -memory anywhere we can find it. It's better to violate the cpuset -than stress the kernel. - -To start a new job that is to be contained within a cpuset, the steps are: - - 1) mkdir /sys/fs/cgroup/cpuset - 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset - 3) Create the new cpuset by doing mkdir's and write's (or echo's) in - the /sys/fs/cgroup/cpuset virtual file system. - 4) Start a task that will be the "founding father" of the new job. - 5) Attach that task to the new cpuset by writing its pid to the - /sys/fs/cgroup/cpuset tasks file for that cpuset. - 6) fork, exec or clone the job tasks from this founding father task. - -For example, the following sequence of commands will setup a cpuset -named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, -and then start a subshell 'sh' in that cpuset: - - mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset - cd /sys/fs/cgroup/cpuset - mkdir Charlie - cd Charlie - /bin/echo 2-3 > cpuset.cpus - /bin/echo 1 > cpuset.mems - /bin/echo $$ > tasks - sh - # The subshell 'sh' is now running in cpuset Charlie - # The next line should display '/Charlie' - cat /proc/self/cpuset - -There are ways to query or modify cpusets: - - via the cpuset file system directly, using the various cd, mkdir, echo, - cat, rmdir commands from the shell, or their equivalent from C. - - via the C library libcpuset. - - via the C library libcgroup. - (http://sourceforge.net/projects/libcg/) - - via the python application cset. - (http://code.google.com/p/cpuset/) - -The sched_setaffinity calls can also be done at the shell prompt using -SGI's runon or Robert Love's taskset. The mbind and set_mempolicy -calls can be done at the shell prompt using the numactl command -(part of Andi Kleen's numa package). - -2. Usage Examples and Syntax -============================ - -2.1 Basic Usage ---------------- - -Creating, modifying, using the cpusets can be done through the cpuset -virtual filesystem. - -To mount it, type: -# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset - -Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the -tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset -is the cpuset that holds the whole system. - -If you want to create a new cpuset under /sys/fs/cgroup/cpuset: -# cd /sys/fs/cgroup/cpuset -# mkdir my_cpuset - -Now you want to do something with this cpuset. -# cd my_cpuset - -In this directory you can find several files: -# ls -cgroup.clone_children cpuset.memory_pressure -cgroup.event_control cpuset.memory_spread_page -cgroup.procs cpuset.memory_spread_slab -cpuset.cpu_exclusive cpuset.mems -cpuset.cpus cpuset.sched_load_balance -cpuset.mem_exclusive cpuset.sched_relax_domain_level -cpuset.mem_hardwall notify_on_release -cpuset.memory_migrate tasks - -Reading them will give you information about the state of this cpuset: -the CPUs and Memory Nodes it can use, the processes that are using -it, its properties. By writing to these files you can manipulate -the cpuset. - -Set some flags: -# /bin/echo 1 > cpuset.cpu_exclusive - -Add some cpus: -# /bin/echo 0-7 > cpuset.cpus - -Add some mems: -# /bin/echo 0-7 > cpuset.mems - -Now attach your shell to this cpuset: -# /bin/echo $$ > tasks - -You can also create cpusets inside your cpuset by using mkdir in this -directory. -# mkdir my_sub_cs - -To remove a cpuset, just use rmdir: -# rmdir my_sub_cs -This will fail if the cpuset is in use (has cpusets inside, or has -processes attached). - -Note that for legacy reasons, the "cpuset" filesystem exists as a -wrapper around the cgroup filesystem. - -The command - -mount -t cpuset X /sys/fs/cgroup/cpuset - -is equivalent to - -mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset -echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent - -2.2 Adding/removing cpus ------------------------- - -This is the syntax to use when writing in the cpus or mems files -in cpuset directories: - -# /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 -# /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 - -To add a CPU to a cpuset, write the new list of CPUs including the -CPU to be added. To add 6 to the above cpuset: - -# /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6 - -Similarly to remove a CPU from a cpuset, write the new list of CPUs -without the CPU to be removed. - -To remove all the CPUs: - -# /bin/echo "" > cpuset.cpus -> clear cpus list - -2.3 Setting flags ------------------ - -The syntax is very simple: - -# /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive' -# /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive' - -2.4 Attaching processes ------------------------ - -# /bin/echo PID > tasks - -Note that it is PID, not PIDs. You can only attach ONE task at a time. -If you have several tasks to attach, you have to do it one after another: - -# /bin/echo PID1 > tasks -# /bin/echo PID2 > tasks - ... -# /bin/echo PIDn > tasks - - -3. Questions -============ - -Q: what's up with this '/bin/echo' ? -A: bash's builtin 'echo' command does not check calls to write() against - errors. If you use it in the cpuset file system, you won't be - able to tell whether a command succeeded or failed. - -Q: When I attach processes, only the first of the line gets really attached ! -A: We can only return one error code per call to write(). So you should also - put only ONE pid. - -4. Contact -========== - -Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/cgroup-v1/devices.rst b/Documentation/cgroup-v1/devices.rst new file mode 100644 index 000000000000..e1886783961e --- /dev/null +++ b/Documentation/cgroup-v1/devices.rst @@ -0,0 +1,132 @@ +=========================== +Device Whitelist Controller +=========================== + +1. Description +============== + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied by its parent. + +2. User Interface +================= + +An entry is added using devices.allow, and removed using +devices.deny. For instance:: + + echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing:: + + echo a > /sys/fs/cgroup/1/devices.deny + +will remove the default 'a *:* rwm' entry. Doing:: + + echo a > /sys/fs/cgroup/1/devices.allow + +will add the 'a *:* rwm' entry to the whitelist. + +3. Security +=========== + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendant of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. + +4. Hierarchy +============ + +device cgroups maintain hierarchy by making sure a cgroup never has more +access permissions than its parent. Every time an entry is written to +a cgroup's devices.deny file, all its children will have that entry removed +from their whitelist and all the locally set whitelist entries will be +re-evaluated. In case one of the locally set whitelist entries would provide +more access than the cgroup's parent, it'll be removed from the whitelist. + +Example:: + + A + / \ + B + + group behavior exceptions + A allow "b 8:* rwm", "c 116:1 rw" + B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" + +If a device is denied in group A:: + + # echo "c 116:* r" > A/devices.deny + +it'll propagate down and after revalidating B's entries, the whitelist entry +"c 116:2 rwm" will be removed:: + + group whitelist entries denied devices + A all "b 8:* rwm", "c 116:* rw" + B "c 1:3 rwm", "b 3:* rwm" all the rest + +In case parent's exceptions change and local exceptions are not allowed +anymore, they'll be deleted. + +Notice that new whitelist entries will not be propagated:: + + A + / \ + B + + group whitelist entries denied devices + A "c 1:3 rwm", "c 1:5 r" all the rest + B "c 1:3 rwm", "c 1:5 r" all the rest + +when adding ``c *:3 rwm``:: + + # echo "c *:3 rwm" >A/devices.allow + +the result:: + + group whitelist entries denied devices + A "c *:3 rwm", "c 1:5 r" all the rest + B "c 1:3 rwm", "c 1:5 r" all the rest + +but now it'll be possible to add new entries to B:: + + # echo "c 2:3 rwm" >B/devices.allow + # echo "c 50:3 r" >B/devices.allow + +or even:: + + # echo "c *:3 rwm" >B/devices.allow + +Allowing or denying all by writing 'a' to devices.allow or devices.deny will +not be possible once the device cgroups has children. + +4.1 Hierarchy (internal implementation) +--------------------------------------- + +device cgroups is implemented internally using a behavior (ALLOW, DENY) and a +list of exceptions. The internal state is controlled using the same user +interface to preserve compatibility with the previous whitelist-only +implementation. Removal or addition of exceptions that will reduce the access +to devices will be propagated down the hierarchy. +For every propagated exception, the effective rules will be re-evaluated based +on current parent's access rules. diff --git a/Documentation/cgroup-v1/devices.txt b/Documentation/cgroup-v1/devices.txt deleted file mode 100644 index 3c1095ca02ea..000000000000 --- a/Documentation/cgroup-v1/devices.txt +++ /dev/null @@ -1,116 +0,0 @@ -Device Whitelist Controller - -1. Description: - -Implement a cgroup to track and enforce open and mknod restrictions -on device files. A device cgroup associates a device access -whitelist with each cgroup. A whitelist entry has 4 fields. -'type' is a (all), c (char), or b (block). 'all' means it applies -to all types and all major and minor numbers. Major and minor are -either an integer or * for all. Access is a composition of r -(read), w (write), and m (mknod). - -The root device cgroup starts with rwm to 'all'. A child device -cgroup gets a copy of the parent. Administrators can then remove -devices from the whitelist or add new entries. A child cgroup can -never receive a device access which is denied by its parent. - -2. User Interface - -An entry is added using devices.allow, and removed using -devices.deny. For instance - - echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow - -allows cgroup 1 to read and mknod the device usually known as -/dev/null. Doing - - echo a > /sys/fs/cgroup/1/devices.deny - -will remove the default 'a *:* rwm' entry. Doing - - echo a > /sys/fs/cgroup/1/devices.allow - -will add the 'a *:* rwm' entry to the whitelist. - -3. Security - -Any task can move itself between cgroups. This clearly won't -suffice, but we can decide the best way to adequately restrict -movement as people get some experience with this. We may just want -to require CAP_SYS_ADMIN, which at least is a separate bit from -CAP_MKNOD. We may want to just refuse moving to a cgroup which -isn't a descendant of the current one. Or we may want to use -CAP_MAC_ADMIN, since we really are trying to lock down root. - -CAP_SYS_ADMIN is needed to modify the whitelist or move another -task to a new cgroup. (Again we'll probably want to change that). - -A cgroup may not be granted more permissions than the cgroup's -parent has. - -4. Hierarchy - -device cgroups maintain hierarchy by making sure a cgroup never has more -access permissions than its parent. Every time an entry is written to -a cgroup's devices.deny file, all its children will have that entry removed -from their whitelist and all the locally set whitelist entries will be -re-evaluated. In case one of the locally set whitelist entries would provide -more access than the cgroup's parent, it'll be removed from the whitelist. - -Example: - A - / \ - B - - group behavior exceptions - A allow "b 8:* rwm", "c 116:1 rw" - B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" - -If a device is denied in group A: - # echo "c 116:* r" > A/devices.deny -it'll propagate down and after revalidating B's entries, the whitelist entry -"c 116:2 rwm" will be removed: - - group whitelist entries denied devices - A all "b 8:* rwm", "c 116:* rw" - B "c 1:3 rwm", "b 3:* rwm" all the rest - -In case parent's exceptions change and local exceptions are not allowed -anymore, they'll be deleted. - -Notice that new whitelist entries will not be propagated: - A - / \ - B - - group whitelist entries denied devices - A "c 1:3 rwm", "c 1:5 r" all the rest - B "c 1:3 rwm", "c 1:5 r" all the rest - -when adding "c *:3 rwm": - # echo "c *:3 rwm" >A/devices.allow - -the result: - group whitelist entries denied devices - A "c *:3 rwm", "c 1:5 r" all the rest - B "c 1:3 rwm", "c 1:5 r" all the rest - -but now it'll be possible to add new entries to B: - # echo "c 2:3 rwm" >B/devices.allow - # echo "c 50:3 r" >B/devices.allow -or even - # echo "c *:3 rwm" >B/devices.allow - -Allowing or denying all by writing 'a' to devices.allow or devices.deny will -not be possible once the device cgroups has children. - -4.1 Hierarchy (internal implementation) - -device cgroups is implemented internally using a behavior (ALLOW, DENY) and a -list of exceptions. The internal state is controlled using the same user -interface to preserve compatibility with the previous whitelist-only -implementation. Removal or addition of exceptions that will reduce the access -to devices will be propagated down the hierarchy. -For every propagated exception, the effective rules will be re-evaluated based -on current parent's access rules. diff --git a/Documentation/cgroup-v1/freezer-subsystem.rst b/Documentation/cgroup-v1/freezer-subsystem.rst new file mode 100644 index 000000000000..582d3427de3f --- /dev/null +++ b/Documentation/cgroup-v1/freezer-subsystem.rst @@ -0,0 +1,127 @@ +============== +Cgroup Freezer +============== + +The cgroup freezer is useful to batch job management system which start +and stop sets of tasks in order to schedule the resources of a machine +according to the desires of a system administrator. This sort of program +is often used on HPC clusters to schedule access to the cluster as a +whole. The cgroup freezer uses cgroups to describe the set of tasks to +be started/stopped by the batch job management system. It also provides +a means to start and stop the tasks composing the job. + +The cgroup freezer will also be useful for checkpointing running groups +of tasks. The freezer allows the checkpoint code to obtain a consistent +image of the tasks by attempting to force the tasks in a cgroup into a +quiescent state. Once the tasks are quiescent another task can +walk /proc or invoke a kernel interface to gather information about the +quiesced tasks. Checkpointed tasks can be restarted later should a +recoverable error occur. This also allows the checkpointed tasks to be +migrated between nodes in a cluster by copying the gathered information +to another node and restarting the tasks there. + +Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping +and resuming tasks in userspace. Both of these signals are observable +from within the tasks we wish to freeze. While SIGSTOP cannot be caught, +blocked, or ignored it can be seen by waiting or ptracing parent tasks. +SIGCONT is especially unsuitable since it can be caught by the task. Any +programs designed to watch for SIGSTOP and SIGCONT could be broken by +attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can +demonstrate this problem using nested bash shells:: + + $ echo $$ + 16644 + $ bash + $ echo $$ + 16690 + + From a second, unrelated bash shell: + $ kill -SIGSTOP 16690 + $ kill -SIGCONT 16690 + + + +This happens because bash can observe both signals and choose how it +responds to them. + +Another example of a program which catches and responds to these +signals is gdb. In fact any program designed to use ptrace is likely to +have a problem with this method of stopping and resuming tasks. + +In contrast, the cgroup freezer uses the kernel freezer code to +prevent the freeze/unfreeze cycle from becoming visible to the tasks +being frozen. This allows the bash example above and gdb to run as +expected. + +The cgroup freezer is hierarchical. Freezing a cgroup freezes all +tasks belonging to the cgroup and all its descendant cgroups. Each +cgroup has its own state (self-state) and the state inherited from the +parent (parent-state). Iff both states are THAWED, the cgroup is +THAWED. + +The following cgroupfs files are created by cgroup freezer. + +* freezer.state: Read-write. + + When read, returns the effective state of the cgroup - "THAWED", + "FREEZING" or "FROZEN". This is the combined self and parent-states. + If any is freezing, the cgroup is freezing (FREEZING or FROZEN). + + FREEZING cgroup transitions into FROZEN state when all tasks + belonging to the cgroup and its descendants become frozen. Note that + a cgroup reverts to FREEZING from FROZEN after a new task is added + to the cgroup or one of its descendant cgroups until the new task is + frozen. + + When written, sets the self-state of the cgroup. Two values are + allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup, + if not already freezing, enters FREEZING state along with all its + descendant cgroups. + + If THAWED is written, the self-state of the cgroup is changed to + THAWED. Note that the effective state may not change to THAWED if + the parent-state is still freezing. If a cgroup's effective state + becomes THAWED, all its descendants which are freezing because of + the cgroup also leave the freezing state. + +* freezer.self_freezing: Read only. + + Shows the self-state. 0 if the self-state is THAWED; otherwise, 1. + This value is 1 iff the last write to freezer.state was "FROZEN". + +* freezer.parent_freezing: Read only. + + Shows the parent-state. 0 if none of the cgroup's ancestors is + frozen; otherwise, 1. + +The root cgroup is non-freezable and the above interface files don't +exist. + +* Examples of usage:: + + # mkdir /sys/fs/cgroup/freezer + # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer + # mkdir /sys/fs/cgroup/freezer/0 + # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks + +to get status of the freezer subsystem:: + + # cat /sys/fs/cgroup/freezer/0/freezer.state + THAWED + +to freeze all tasks in the container:: + + # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state + FREEZING + # cat /sys/fs/cgroup/freezer/0/freezer.state + FROZEN + +to unfreeze all tasks in the container:: + + # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state + THAWED + +This is the basic mechanism which should do the right thing for user space task +in a simple scenario. diff --git a/Documentation/cgroup-v1/freezer-subsystem.txt b/Documentation/cgroup-v1/freezer-subsystem.txt deleted file mode 100644 index e831cb2b8394..000000000000 --- a/Documentation/cgroup-v1/freezer-subsystem.txt +++ /dev/null @@ -1,123 +0,0 @@ -The cgroup freezer is useful to batch job management system which start -and stop sets of tasks in order to schedule the resources of a machine -according to the desires of a system administrator. This sort of program -is often used on HPC clusters to schedule access to the cluster as a -whole. The cgroup freezer uses cgroups to describe the set of tasks to -be started/stopped by the batch job management system. It also provides -a means to start and stop the tasks composing the job. - -The cgroup freezer will also be useful for checkpointing running groups -of tasks. The freezer allows the checkpoint code to obtain a consistent -image of the tasks by attempting to force the tasks in a cgroup into a -quiescent state. Once the tasks are quiescent another task can -walk /proc or invoke a kernel interface to gather information about the -quiesced tasks. Checkpointed tasks can be restarted later should a -recoverable error occur. This also allows the checkpointed tasks to be -migrated between nodes in a cluster by copying the gathered information -to another node and restarting the tasks there. - -Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping -and resuming tasks in userspace. Both of these signals are observable -from within the tasks we wish to freeze. While SIGSTOP cannot be caught, -blocked, or ignored it can be seen by waiting or ptracing parent tasks. -SIGCONT is especially unsuitable since it can be caught by the task. Any -programs designed to watch for SIGSTOP and SIGCONT could be broken by -attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can -demonstrate this problem using nested bash shells: - - $ echo $$ - 16644 - $ bash - $ echo $$ - 16690 - - From a second, unrelated bash shell: - $ kill -SIGSTOP 16690 - $ kill -SIGCONT 16690 - - - -This happens because bash can observe both signals and choose how it -responds to them. - -Another example of a program which catches and responds to these -signals is gdb. In fact any program designed to use ptrace is likely to -have a problem with this method of stopping and resuming tasks. - -In contrast, the cgroup freezer uses the kernel freezer code to -prevent the freeze/unfreeze cycle from becoming visible to the tasks -being frozen. This allows the bash example above and gdb to run as -expected. - -The cgroup freezer is hierarchical. Freezing a cgroup freezes all -tasks belonging to the cgroup and all its descendant cgroups. Each -cgroup has its own state (self-state) and the state inherited from the -parent (parent-state). Iff both states are THAWED, the cgroup is -THAWED. - -The following cgroupfs files are created by cgroup freezer. - -* freezer.state: Read-write. - - When read, returns the effective state of the cgroup - "THAWED", - "FREEZING" or "FROZEN". This is the combined self and parent-states. - If any is freezing, the cgroup is freezing (FREEZING or FROZEN). - - FREEZING cgroup transitions into FROZEN state when all tasks - belonging to the cgroup and its descendants become frozen. Note that - a cgroup reverts to FREEZING from FROZEN after a new task is added - to the cgroup or one of its descendant cgroups until the new task is - frozen. - - When written, sets the self-state of the cgroup. Two values are - allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup, - if not already freezing, enters FREEZING state along with all its - descendant cgroups. - - If THAWED is written, the self-state of the cgroup is changed to - THAWED. Note that the effective state may not change to THAWED if - the parent-state is still freezing. If a cgroup's effective state - becomes THAWED, all its descendants which are freezing because of - the cgroup also leave the freezing state. - -* freezer.self_freezing: Read only. - - Shows the self-state. 0 if the self-state is THAWED; otherwise, 1. - This value is 1 iff the last write to freezer.state was "FROZEN". - -* freezer.parent_freezing: Read only. - - Shows the parent-state. 0 if none of the cgroup's ancestors is - frozen; otherwise, 1. - -The root cgroup is non-freezable and the above interface files don't -exist. - -* Examples of usage : - - # mkdir /sys/fs/cgroup/freezer - # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer - # mkdir /sys/fs/cgroup/freezer/0 - # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks - -to get status of the freezer subsystem : - - # cat /sys/fs/cgroup/freezer/0/freezer.state - THAWED - -to freeze all tasks in the container : - - # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state - # cat /sys/fs/cgroup/freezer/0/freezer.state - FREEZING - # cat /sys/fs/cgroup/freezer/0/freezer.state - FROZEN - -to unfreeze all tasks in the container : - - # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state - # cat /sys/fs/cgroup/freezer/0/freezer.state - THAWED - -This is the basic mechanism which should do the right thing for user space task -in a simple scenario. diff --git a/Documentation/cgroup-v1/hugetlb.rst b/Documentation/cgroup-v1/hugetlb.rst new file mode 100644 index 000000000000..a3902aa253a9 --- /dev/null +++ b/Documentation/cgroup-v1/hugetlb.rst @@ -0,0 +1,50 @@ +================== +HugeTLB Controller +================== + +The HugeTLB controller allows to limit the HugeTLB usage per control group and +enforces the controller limit during page fault. Since HugeTLB doesn't +support page reclaim, enforcing the limit at page fault time implies that, +the application will get SIGBUS signal if it tries to access HugeTLB pages +beyond its limit. This requires the application to know beforehand how much +HugeTLB pages it would require for its use. + +HugeTLB controller can be created by first mounting the cgroup filesystem. + +# mount -t cgroup -o hugetlb none /sys/fs/cgroup + +With the above step, the initial or the parent HugeTLB group becomes +visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in +the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. + +New groups can be created under the parent group /sys/fs/cgroup:: + + # cd /sys/fs/cgroup + # mkdir g1 + # echo $$ > g1/tasks + +The above steps create a new group g1 and move the current shell +process (bash) into it. + +Brief summary of control files:: + + hugetlb..limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage + hugetlb..max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded + hugetlb..usage_in_bytes # show current usage for "hugepagesize" hugetlb + hugetlb..failcnt # show the number of allocation failure due to HugeTLB limit + +For a system supporting three hugepage sizes (64k, 32M and 1G), the control +files include:: + + hugetlb.1GB.limit_in_bytes + hugetlb.1GB.max_usage_in_bytes + hugetlb.1GB.usage_in_bytes + hugetlb.1GB.failcnt + hugetlb.64KB.limit_in_bytes + hugetlb.64KB.max_usage_in_bytes + hugetlb.64KB.usage_in_bytes + hugetlb.64KB.failcnt + hugetlb.32MB.limit_in_bytes + hugetlb.32MB.max_usage_in_bytes + hugetlb.32MB.usage_in_bytes + hugetlb.32MB.failcnt diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt deleted file mode 100644 index 1260e5369b9b..000000000000 --- a/Documentation/cgroup-v1/hugetlb.txt +++ /dev/null @@ -1,49 +0,0 @@ -HugeTLB Controller -------------------- - -The HugeTLB controller allows to limit the HugeTLB usage per control group and -enforces the controller limit during page fault. Since HugeTLB doesn't -support page reclaim, enforcing the limit at page fault time implies that, -the application will get SIGBUS signal if it tries to access HugeTLB pages -beyond its limit. This requires the application to know beforehand how much -HugeTLB pages it would require for its use. - -HugeTLB controller can be created by first mounting the cgroup filesystem. - -# mount -t cgroup -o hugetlb none /sys/fs/cgroup - -With the above step, the initial or the parent HugeTLB group becomes -visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in -the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. - -New groups can be created under the parent group /sys/fs/cgroup. - -# cd /sys/fs/cgroup -# mkdir g1 -# echo $$ > g1/tasks - -The above steps create a new group g1 and move the current shell -process (bash) into it. - -Brief summary of control files - - hugetlb..limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage - hugetlb..max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded - hugetlb..usage_in_bytes # show current usage for "hugepagesize" hugetlb - hugetlb..failcnt # show the number of allocation failure due to HugeTLB limit - -For a system supporting three hugepage sizes (64k, 32M and 1G), the control -files include: - -hugetlb.1GB.limit_in_bytes -hugetlb.1GB.max_usage_in_bytes -hugetlb.1GB.usage_in_bytes -hugetlb.1GB.failcnt -hugetlb.64KB.limit_in_bytes -hugetlb.64KB.max_usage_in_bytes -hugetlb.64KB.usage_in_bytes -hugetlb.64KB.failcnt -hugetlb.32MB.limit_in_bytes -hugetlb.32MB.max_usage_in_bytes -hugetlb.32MB.usage_in_bytes -hugetlb.32MB.failcnt diff --git a/Documentation/cgroup-v1/index.rst b/Documentation/cgroup-v1/index.rst new file mode 100644 index 000000000000..fe76d42edc11 --- /dev/null +++ b/Documentation/cgroup-v1/index.rst @@ -0,0 +1,30 @@ +:orphan: + +======================== +Control Groups version 1 +======================== + +.. toctree:: + :maxdepth: 1 + + cgroups + + blkio-controller + cpuacct + cpusets + devices + freezer-subsystem + hugetlb + memcg_test + memory + net_cls + net_prio + pids + rdma + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/cgroup-v1/memcg_test.rst b/Documentation/cgroup-v1/memcg_test.rst new file mode 100644 index 000000000000..91bd18c6a514 --- /dev/null +++ b/Documentation/cgroup-v1/memcg_test.rst @@ -0,0 +1,355 @@ +===================================================== +Memory Resource Controller(Memcg) Implementation Memo +===================================================== + +Last Updated: 2010/2 + +Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34). + +Because VM is getting complex (one of reasons is memcg...), memcg's behavior +is complex. This is a document for memcg's internal behavior. +Please note that implementation details can be changed. + +(*) Topics on API should be in Documentation/cgroup-v1/memory.rst) + +0. How to record usage ? +======================== + + 2 objects are used. + + page_cgroup ....an object per page. + + Allocated at boot or memory hotplug. Freed at memory hot removal. + + swap_cgroup ... an entry per swp_entry. + + Allocated at swapon(). Freed at swapoff(). + + The page_cgroup has USED bit and double count against a page_cgroup never + occurs. swap_cgroup is used only when a charged page is swapped-out. + +1. Charge +========= + + a page/swp_entry may be charged (usage += PAGE_SIZE) at + + mem_cgroup_try_charge() + +2. Uncharge +=========== + + a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by + + mem_cgroup_uncharge() + Called when a page's refcount goes down to 0. + + mem_cgroup_uncharge_swap() + Called when swp_entry's refcnt goes down to 0. A charge against swap + disappears. + +3. charge-commit-cancel +======================= + + Memcg pages are charged in two steps: + + - mem_cgroup_try_charge() + - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() + + At try_charge(), there are no flags to say "this page is charged". + at this point, usage += PAGE_SIZE. + + At commit(), the page is associated with the memcg. + + At cancel(), simply usage -= PAGE_SIZE. + +Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. + +4. Anonymous +============ + + Anonymous page is newly allocated at + - page fault into MAP_ANONYMOUS mapping. + - Copy-On-Write. + + 4.1 Swap-in. + At swap-in, the page is taken from swap-cache. There are 2 cases. + + (a) If the SwapCache is newly allocated and read, it has no charges. + (b) If the SwapCache has been mapped by processes, it has been + charged already. + + 4.2 Swap-out. + At swap-out, typical state transition is below. + + (a) add to swap cache. (marked as SwapCache) + swp_entry's refcnt += 1. + (b) fully unmapped. + swp_entry's refcnt += # of ptes. + (c) write back to swap. + (d) delete from swap cache. (remove from SwapCache) + swp_entry's refcnt -= 1. + + + Finally, at task exit, + (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. + +5. Page Cache +============= + + Page Cache is charged at + - add_to_page_cache_locked(). + + The logic is very clear. (About migration, see below) + + Note: + __remove_from_page_cache() is called by remove_from_page_cache() + and __remove_mapping(). + +6. Shmem(tmpfs) Page Cache +=========================== + + The best way to understand shmem's page state transition is to read + mm/shmem.c. + + But brief explanation of the behavior of memcg around shmem will be + helpful to understand the logic. + + Shmem's page (just leaf page, not direct/indirect block) can be on + + - radix-tree of shmem's inode. + - SwapCache. + - Both on radix-tree and SwapCache. This happens at swap-in + and swap-out, + + It's charged when... + + - A new page is added to shmem's radix-tree. + - A swp page is read. (move a charge from swap_cgroup to page_cgroup) + +7. Page Migration +================= + + mem_cgroup_migrate() + +8. LRU +====== + Each memcg has its own private LRU. Now, its handling is under global + VM's control (means that it's handled under global pgdat->lru_lock). + Almost all routines around memcg's LRU is called by global LRU's + list management functions under pgdat->lru_lock. + + A special function is mem_cgroup_isolate_pages(). This scans + memcg's private LRU and call __isolate_lru_page() to extract a page + from LRU. + + (By __isolate_lru_page(), the page is removed from both of global and + private LRU.) + + +9. Typical Tests. +================= + + Tests for racy cases. + +9.1 Small limit to memcg. +------------------------- + + When you do test to do racy case, it's good test to set memcg's limit + to be very small rather than GB. Many races found in the test under + xKB or xxMB limits. + + (Memory behavior under GB and Memory behavior under MB shows very + different situation.) + +9.2 Shmem +--------- + + Historically, memcg's shmem handling was poor and we saw some amount + of troubles here. This is because shmem is page-cache but can be + SwapCache. Test with shmem/tmpfs is always good test. + +9.3 Migration +------------- + + For NUMA, migration is an another special case. To do easy test, cpuset + is useful. Following is a sample script to do migration:: + + mount -t cgroup -o cpuset none /opt/cpuset + + mkdir /opt/cpuset/01 + echo 1 > /opt/cpuset/01/cpuset.cpus + echo 0 > /opt/cpuset/01/cpuset.mems + echo 1 > /opt/cpuset/01/cpuset.memory_migrate + mkdir /opt/cpuset/02 + echo 1 > /opt/cpuset/02/cpuset.cpus + echo 1 > /opt/cpuset/02/cpuset.mems + echo 1 > /opt/cpuset/02/cpuset.memory_migrate + + In above set, when you moves a task from 01 to 02, page migration to + node 0 to node 1 will occur. Following is a script to migrate all + under cpuset.:: + + -- + move_task() + { + for pid in $1 + do + /bin/echo $pid >$2/tasks 2>/dev/null + echo -n $pid + echo -n " " + done + echo END + } + + G1_TASK=`cat ${G1}/tasks` + G2_TASK=`cat ${G2}/tasks` + move_task "${G1_TASK}" ${G2} & + -- + +9.4 Memory hotplug +------------------ + + memory hotplug test is one of good test. + + to offline memory, do following:: + + # echo offline > /sys/devices/system/memory/memoryXXX/state + + (XXX is the place of memory) + + This is an easy way to test page migration, too. + +9.5 mkdir/rmdir +--------------- + + When using hierarchy, mkdir/rmdir test should be done. + Use tests like the following:: + + echo 1 >/opt/cgroup/01/memory/use_hierarchy + mkdir /opt/cgroup/01/child_a + mkdir /opt/cgroup/01/child_b + + set limit to 01. + add limit to 01/child_b + run jobs under child_a and child_b + + create/delete following groups at random while jobs are running:: + + /opt/cgroup/01/child_a/child_aa + /opt/cgroup/01/child_b/child_bb + /opt/cgroup/01/child_c + + running new jobs in new group is also good. + +9.6 Mount with other subsystems +------------------------------- + + Mounting with other subsystems is a good test because there is a + race and lock dependency with other cgroup subsystems. + + example:: + + # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices + + and do task move, mkdir, rmdir etc...under this. + +9.7 swapoff +----------- + + Besides management of swap is one of complicated parts of memcg, + call path of swap-in at swapoff is not same as usual swap-in path.. + It's worth to be tested explicitly. + + For example, test like following is good: + + (Shell-A):: + + # mount -t cgroup none /cgroup -o memory + # mkdir /cgroup/test + # echo 40M > /cgroup/test/memory.limit_in_bytes + # echo 0 > /cgroup/test/tasks + + Run malloc(100M) program under this. You'll see 60M of swaps. + + (Shell-B):: + + # move all tasks in /cgroup/test to /cgroup + # /sbin/swapoff -a + # rmdir /cgroup/test + # kill malloc task. + + Of course, tmpfs v.s. swapoff test should be tested, too. + +9.8 OOM-Killer +-------------- + + Out-of-memory caused by memcg's limit will kill tasks under + the memcg. When hierarchy is used, a task under hierarchy + will be killed by the kernel. + + In this case, panic_on_oom shouldn't be invoked and tasks + in other groups shouldn't be killed. + + It's not difficult to cause OOM under memcg as following. + + Case A) when you can swapoff:: + + #swapoff -a + #echo 50M > /memory.limit_in_bytes + + run 51M of malloc + + Case B) when you use mem+swap limitation:: + + #echo 50M > memory.limit_in_bytes + #echo 50M > memory.memsw.limit_in_bytes + + run 51M of malloc + +9.9 Move charges at task migration +---------------------------------- + + Charges associated with a task can be moved along with task migration. + + (Shell-A):: + + #mkdir /cgroup/A + #echo $$ >/cgroup/A/tasks + + run some programs which uses some amount of memory in /cgroup/A. + + (Shell-B):: + + #mkdir /cgroup/B + #echo 1 >/cgroup/B/memory.move_charge_at_immigrate + #echo "pid of the program running in group A" >/cgroup/B/tasks + + You can see charges have been moved by reading ``*.usage_in_bytes`` or + memory.stat of both A and B. + + See 8.2 of Documentation/cgroup-v1/memory.rst to see what value should + be written to move_charge_at_immigrate. + +9.10 Memory thresholds +---------------------- + + Memory controller implements memory thresholds using cgroups notification + API. You can use tools/cgroup/cgroup_event_listener.c to test it. + + (Shell-A) Create cgroup and run event listener:: + + # mkdir /cgroup/A + # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M + + (Shell-B) Add task to cgroup and try to allocate and free memory:: + + # echo $$ >/cgroup/A/tasks + # a="$(dd if=/dev/zero bs=1M count=10)" + # a= + + You will see message from cgroup_event_listener every time you cross + the thresholds. + + Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds. + + It's good idea to test root cgroup as well. diff --git a/Documentation/cgroup-v1/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.txt deleted file mode 100644 index 621e29ffb358..000000000000 --- a/Documentation/cgroup-v1/memcg_test.txt +++ /dev/null @@ -1,280 +0,0 @@ -Memory Resource Controller(Memcg) Implementation Memo. -Last Updated: 2010/2 -Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34). - -Because VM is getting complex (one of reasons is memcg...), memcg's behavior -is complex. This is a document for memcg's internal behavior. -Please note that implementation details can be changed. - -(*) Topics on API should be in Documentation/cgroup-v1/memory.txt) - -0. How to record usage ? - 2 objects are used. - - page_cgroup ....an object per page. - Allocated at boot or memory hotplug. Freed at memory hot removal. - - swap_cgroup ... an entry per swp_entry. - Allocated at swapon(). Freed at swapoff(). - - The page_cgroup has USED bit and double count against a page_cgroup never - occurs. swap_cgroup is used only when a charged page is swapped-out. - -1. Charge - - a page/swp_entry may be charged (usage += PAGE_SIZE) at - - mem_cgroup_try_charge() - -2. Uncharge - a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by - - mem_cgroup_uncharge() - Called when a page's refcount goes down to 0. - - mem_cgroup_uncharge_swap() - Called when swp_entry's refcnt goes down to 0. A charge against swap - disappears. - -3. charge-commit-cancel - Memcg pages are charged in two steps: - mem_cgroup_try_charge() - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() - - At try_charge(), there are no flags to say "this page is charged". - at this point, usage += PAGE_SIZE. - - At commit(), the page is associated with the memcg. - - At cancel(), simply usage -= PAGE_SIZE. - -Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. - -4. Anonymous - Anonymous page is newly allocated at - - page fault into MAP_ANONYMOUS mapping. - - Copy-On-Write. - - 4.1 Swap-in. - At swap-in, the page is taken from swap-cache. There are 2 cases. - - (a) If the SwapCache is newly allocated and read, it has no charges. - (b) If the SwapCache has been mapped by processes, it has been - charged already. - - 4.2 Swap-out. - At swap-out, typical state transition is below. - - (a) add to swap cache. (marked as SwapCache) - swp_entry's refcnt += 1. - (b) fully unmapped. - swp_entry's refcnt += # of ptes. - (c) write back to swap. - (d) delete from swap cache. (remove from SwapCache) - swp_entry's refcnt -= 1. - - - Finally, at task exit, - (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. - -5. Page Cache - Page Cache is charged at - - add_to_page_cache_locked(). - - The logic is very clear. (About migration, see below) - Note: __remove_from_page_cache() is called by remove_from_page_cache() - and __remove_mapping(). - -6. Shmem(tmpfs) Page Cache - The best way to understand shmem's page state transition is to read - mm/shmem.c. - But brief explanation of the behavior of memcg around shmem will be - helpful to understand the logic. - - Shmem's page (just leaf page, not direct/indirect block) can be on - - radix-tree of shmem's inode. - - SwapCache. - - Both on radix-tree and SwapCache. This happens at swap-in - and swap-out, - - It's charged when... - - A new page is added to shmem's radix-tree. - - A swp page is read. (move a charge from swap_cgroup to page_cgroup) - -7. Page Migration - - mem_cgroup_migrate() - -8. LRU - Each memcg has its own private LRU. Now, its handling is under global - VM's control (means that it's handled under global pgdat->lru_lock). - Almost all routines around memcg's LRU is called by global LRU's - list management functions under pgdat->lru_lock. - - A special function is mem_cgroup_isolate_pages(). This scans - memcg's private LRU and call __isolate_lru_page() to extract a page - from LRU. - (By __isolate_lru_page(), the page is removed from both of global and - private LRU.) - - -9. Typical Tests. - - Tests for racy cases. - - 9.1 Small limit to memcg. - When you do test to do racy case, it's good test to set memcg's limit - to be very small rather than GB. Many races found in the test under - xKB or xxMB limits. - (Memory behavior under GB and Memory behavior under MB shows very - different situation.) - - 9.2 Shmem - Historically, memcg's shmem handling was poor and we saw some amount - of troubles here. This is because shmem is page-cache but can be - SwapCache. Test with shmem/tmpfs is always good test. - - 9.3 Migration - For NUMA, migration is an another special case. To do easy test, cpuset - is useful. Following is a sample script to do migration. - - mount -t cgroup -o cpuset none /opt/cpuset - - mkdir /opt/cpuset/01 - echo 1 > /opt/cpuset/01/cpuset.cpus - echo 0 > /opt/cpuset/01/cpuset.mems - echo 1 > /opt/cpuset/01/cpuset.memory_migrate - mkdir /opt/cpuset/02 - echo 1 > /opt/cpuset/02/cpuset.cpus - echo 1 > /opt/cpuset/02/cpuset.mems - echo 1 > /opt/cpuset/02/cpuset.memory_migrate - - In above set, when you moves a task from 01 to 02, page migration to - node 0 to node 1 will occur. Following is a script to migrate all - under cpuset. - -- - move_task() - { - for pid in $1 - do - /bin/echo $pid >$2/tasks 2>/dev/null - echo -n $pid - echo -n " " - done - echo END - } - - G1_TASK=`cat ${G1}/tasks` - G2_TASK=`cat ${G2}/tasks` - move_task "${G1_TASK}" ${G2} & - -- - 9.4 Memory hotplug. - memory hotplug test is one of good test. - to offline memory, do following. - # echo offline > /sys/devices/system/memory/memoryXXX/state - (XXX is the place of memory) - This is an easy way to test page migration, too. - - 9.5 mkdir/rmdir - When using hierarchy, mkdir/rmdir test should be done. - Use tests like the following. - - echo 1 >/opt/cgroup/01/memory/use_hierarchy - mkdir /opt/cgroup/01/child_a - mkdir /opt/cgroup/01/child_b - - set limit to 01. - add limit to 01/child_b - run jobs under child_a and child_b - - create/delete following groups at random while jobs are running. - /opt/cgroup/01/child_a/child_aa - /opt/cgroup/01/child_b/child_bb - /opt/cgroup/01/child_c - - running new jobs in new group is also good. - - 9.6 Mount with other subsystems. - Mounting with other subsystems is a good test because there is a - race and lock dependency with other cgroup subsystems. - - example) - # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices - - and do task move, mkdir, rmdir etc...under this. - - 9.7 swapoff. - Besides management of swap is one of complicated parts of memcg, - call path of swap-in at swapoff is not same as usual swap-in path.. - It's worth to be tested explicitly. - - For example, test like following is good. - (Shell-A) - # mount -t cgroup none /cgroup -o memory - # mkdir /cgroup/test - # echo 40M > /cgroup/test/memory.limit_in_bytes - # echo 0 > /cgroup/test/tasks - Run malloc(100M) program under this. You'll see 60M of swaps. - (Shell-B) - # move all tasks in /cgroup/test to /cgroup - # /sbin/swapoff -a - # rmdir /cgroup/test - # kill malloc task. - - Of course, tmpfs v.s. swapoff test should be tested, too. - - 9.8 OOM-Killer - Out-of-memory caused by memcg's limit will kill tasks under - the memcg. When hierarchy is used, a task under hierarchy - will be killed by the kernel. - In this case, panic_on_oom shouldn't be invoked and tasks - in other groups shouldn't be killed. - - It's not difficult to cause OOM under memcg as following. - Case A) when you can swapoff - #swapoff -a - #echo 50M > /memory.limit_in_bytes - run 51M of malloc - - Case B) when you use mem+swap limitation. - #echo 50M > memory.limit_in_bytes - #echo 50M > memory.memsw.limit_in_bytes - run 51M of malloc - - 9.9 Move charges at task migration - Charges associated with a task can be moved along with task migration. - - (Shell-A) - #mkdir /cgroup/A - #echo $$ >/cgroup/A/tasks - run some programs which uses some amount of memory in /cgroup/A. - - (Shell-B) - #mkdir /cgroup/B - #echo 1 >/cgroup/B/memory.move_charge_at_immigrate - #echo "pid of the program running in group A" >/cgroup/B/tasks - - You can see charges have been moved by reading *.usage_in_bytes or - memory.stat of both A and B. - See 8.2 of Documentation/cgroup-v1/memory.txt to see what value should be - written to move_charge_at_immigrate. - - 9.10 Memory thresholds - Memory controller implements memory thresholds using cgroups notification - API. You can use tools/cgroup/cgroup_event_listener.c to test it. - - (Shell-A) Create cgroup and run event listener - # mkdir /cgroup/A - # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M - - (Shell-B) Add task to cgroup and try to allocate and free memory - # echo $$ >/cgroup/A/tasks - # a="$(dd if=/dev/zero bs=1M count=10)" - # a= - - You will see message from cgroup_event_listener every time you cross - the thresholds. - - Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds. - - It's good idea to test root cgroup as well. diff --git a/Documentation/cgroup-v1/memory.rst b/Documentation/cgroup-v1/memory.rst new file mode 100644 index 000000000000..41bdc038dad9 --- /dev/null +++ b/Documentation/cgroup-v1/memory.rst @@ -0,0 +1,1003 @@ +========================== +Memory Resource Controller +========================== + +NOTE: + This document is hopelessly outdated and it asks for a complete + rewrite. It still contains a useful information so we are keeping it + here but make sure to check the current code if you need a deeper + understanding. + +NOTE: + The Memory Resource Controller has generically been referred to as the + memory controller in this document. Do not confuse memory controller + used here with the memory controller that is used in hardware. + +(For editors) In this document: + When we mention a cgroup (cgroupfs's directory) with memory controller, + we call it "memory cgroup". When you see git-log and source code, you'll + see patch's title and function names tend to use "memcg". + In this document, we avoid using it. + +Benefits and Purpose of the memory controller +============================================= + +The memory controller isolates the memory behaviour of a group of tasks +from the rest of the system. The article on LWN [12] mentions some probable +uses of the memory controller. The memory controller can be used to + +a. Isolate an application or a group of applications + Memory-hungry applications can be isolated and limited to a smaller + amount of memory. +b. Create a cgroup with a limited amount of memory; this can be used + as a good alternative to booting with mem=XXXX. +c. Virtualization solutions can control the amount of memory they want + to assign to a virtual machine instance. +d. A CD/DVD burner could control the amount of memory used by the + rest of the system to ensure that burning does not fail due to lack + of available memory. +e. There are several other use cases; find one or use the controller just + for fun (to learn and hack on the VM subsystem). + +Current Status: linux-2.6.34-mmotm(development version of 2010/April) + +Features: + + - accounting anonymous pages, file caches, swap caches usage and limiting them. + - pages are linked to per-memcg LRU exclusively, and there is no global LRU. + - optionally, memory+swap usage can be accounted and limited. + - hierarchical accounting + - soft limit + - moving (recharging) account at moving a task is selectable. + - usage threshold notifier + - memory pressure notifier + - oom-killer disable knob and oom-notifier + - Root cgroup has no limit controls. + + Kernel memory support is a work in progress, and the current version provides + basically functionality. (See Section 2.7) + +Brief summary of control files. + +==================================== ========================================== + tasks attach a task(thread) and show list of + threads + cgroup.procs show list of processes + cgroup.event_control an interface for event_fd() + memory.usage_in_bytes show current usage for memory + (See 5.5 for details) + memory.memsw.usage_in_bytes show current usage for memory+Swap + (See 5.5 for details) + memory.limit_in_bytes set/show limit of memory usage + memory.memsw.limit_in_bytes set/show limit of memory+Swap usage + memory.failcnt show the number of memory usage hits limits + memory.memsw.failcnt show the number of memory+Swap hits limits + memory.max_usage_in_bytes show max memory usage recorded + memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded + memory.soft_limit_in_bytes set/show soft limit of memory usage + memory.stat show various statistics + memory.use_hierarchy set/show hierarchical account enabled + memory.force_empty trigger forced page reclaim + memory.pressure_level set memory pressure notifications + memory.swappiness set/show swappiness parameter of vmscan + (See sysctl's vm.swappiness) + memory.move_charge_at_immigrate set/show controls of moving charges + memory.oom_control set/show oom controls. + memory.numa_stat show the number of memory usage per numa + node + + memory.kmem.limit_in_bytes set/show hard limit for kernel memory + memory.kmem.usage_in_bytes show current kernel memory allocation + memory.kmem.failcnt show the number of kernel memory usage + hits limits + memory.kmem.max_usage_in_bytes show max kernel memory usage recorded + + memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory + memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation + memory.kmem.tcp.failcnt show the number of tcp buf memory usage + hits limits + memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded +==================================== ========================================== + +1. History +========== + +The memory controller has a long history. A request for comments for the memory +controller was posted by Balbir Singh [1]. At the time the RFC was posted +there were several implementations for memory control. The goal of the +RFC was to build consensus and agreement for the minimal features required +for memory control. The first RSS controller was posted by Balbir Singh[2] +in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the +RSS controller. At OLS, at the resource management BoF, everyone suggested +that we handle both page cache and RSS together. Another request was raised +to allow user space handling of OOM. The current memory controller is +at version 6; it combines both mapped (RSS) and unmapped Page +Cache Control [11]. + +2. Memory Control +================= + +Memory is a unique resource in the sense that it is present in a limited +amount. If a task requires a lot of CPU processing, the task can spread +its processing over a period of hours, days, months or years, but with +memory, the same physical memory needs to be reused to accomplish the task. + +The memory controller implementation has been divided into phases. These +are: + +1. Memory controller +2. mlock(2) controller +3. Kernel user memory accounting and slab control +4. user mappings length controller + +The memory controller is the first controller developed. + +2.1. Design +----------- + +The core of the design is a counter called the page_counter. The +page_counter tracks the current memory usage and limit of the group of +processes associated with the controller. Each cgroup has a memory controller +specific data structure (mem_cgroup) associated with it. + +2.2. Accounting +--------------- + +:: + + +--------------------+ + | mem_cgroup | + | (page_counter) | + +--------------------+ + / ^ \ + / | \ + +---------------+ | +---------------+ + | mm_struct | |.... | mm_struct | + | | | | | + +---------------+ | +---------------+ + | + + --------------+ + | + +---------------+ +------+--------+ + | page +----------> page_cgroup| + | | | | + +---------------+ +---------------+ + + (Figure 1: Hierarchy of Accounting) + + +Figure 1 shows the important aspects of the controller + +1. Accounting happens per cgroup +2. Each mm_struct knows about which cgroup it belongs to +3. Each page has a pointer to the page_cgroup, which in turn knows the + cgroup it belongs to + +The accounting is done as follows: mem_cgroup_charge_common() is invoked to +set up the necessary data structures and check if the cgroup that is being +charged is over its limit. If it is, then reclaim is invoked on the cgroup. +More details can be found in the reclaim section of this document. +If everything goes well, a page meta-data-structure called page_cgroup is +updated. page_cgroup has its own LRU on cgroup. +(*) page_cgroup structure is allocated at boot/memory-hotplug time. + +2.2.1 Accounting details +------------------------ + +All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. +Some pages which are never reclaimable and will not be on the LRU +are not accounted. We just account pages under usual VM management. + +RSS pages are accounted at page_fault unless they've already been accounted +for earlier. A file page will be accounted for as Page Cache when it's +inserted into inode (radix-tree). While it's mapped into the page tables of +processes, duplicate accounting is carefully avoided. + +An RSS page is unaccounted when it's fully unmapped. A PageCache page is +unaccounted when it's removed from radix-tree. Even if RSS pages are fully +unmapped (by kswapd), they may exist as SwapCache in the system until they +are really freed. Such SwapCaches are also accounted. +A swapped-in page is not accounted until it's mapped. + +Note: The kernel does swapin-readahead and reads multiple swaps at once. +This means swapped-in pages may contain pages for other tasks than a task +causing page fault. So, we avoid accounting at swap-in I/O. + +At page migration, accounting information is kept. + +Note: we just account pages-on-LRU because our purpose is to control amount +of used pages; not-on-LRU pages tend to be out-of-control from VM view. + +2.3 Shared Page Accounting +-------------------------- + +Shared pages are accounted on the basis of the first touch approach. The +cgroup that first touches a page is accounted for the page. The principle +behind this approach is that a cgroup that aggressively uses a shared +page will eventually get charged for it (once it is uncharged from +the cgroup that brought it in -- this will happen on memory pressure). + +But see section 8.2: when moving a task to another cgroup, its pages may +be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. + +Exception: If CONFIG_MEMCG_SWAP is not used. +When you do swapoff and make swapped-out pages of shmem(tmpfs) to +be backed into memory in force, charges for pages are accounted against the +caller of swapoff rather than the users of shmem. + +2.4 Swap Extension (CONFIG_MEMCG_SWAP) +-------------------------------------- + +Swap Extension allows you to record charge for swap. A swapped-in page is +charged back to original page allocator if possible. + +When swap is accounted, following files are added. + + - memory.memsw.usage_in_bytes. + - memory.memsw.limit_in_bytes. + +memsw means memory+swap. Usage of memory+swap is limited by +memsw.limit_in_bytes. + +Example: Assume a system with 4G of swap. A task which allocates 6G of memory +(by mistake) under 2G memory limitation will use all swap. +In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap. +By using the memsw limit, you can avoid system OOM which can be caused by swap +shortage. + +**why 'memory+swap' rather than swap** + +The global LRU(kswapd) can swap out arbitrary pages. Swap-out means +to move account from memory to swap...there is no change in usage of +memory+swap. In other words, when we want to limit the usage of swap without +affecting global LRU, memory+swap limit is better than just limiting swap from +an OS point of view. + +**What happens when a cgroup hits memory.memsw.limit_in_bytes** + +When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out +in this cgroup. Then, swap-out will not be done by cgroup routine and file +caches are dropped. But as mentioned above, global LRU can do swapout memory +from it for sanity of the system's memory management state. You can't forbid +it by cgroup. + +2.5 Reclaim +----------- + +Each cgroup maintains a per cgroup LRU which has the same structure as +global VM. When a cgroup goes over its limit, we first try +to reclaim memory from the cgroup so as to make space for the new +pages that the cgroup has touched. If the reclaim is unsuccessful, +an OOM routine is invoked to select and kill the bulkiest task in the +cgroup. (See 10. OOM Control below.) + +The reclaim algorithm has not been modified for cgroups, except that +pages that are selected for reclaiming come from the per-cgroup LRU +list. + +NOTE: + Reclaim does not work for the root cgroup, since we cannot set any + limits on the root cgroup. + +Note2: + When panic_on_oom is set to "2", the whole system will panic. + +When oom event notifier is registered, event will be delivered. +(See oom_control section) + +2.6 Locking +----------- + + lock_page_cgroup()/unlock_page_cgroup() should not be called under + the i_pages lock. + + Other lock order is following: + + PG_locked. + mm->page_table_lock + pgdat->lru_lock + lock_page_cgroup. + + In many cases, just lock_page_cgroup() is called. + + per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by + pgdat->lru_lock, it has no lock of its own. + +2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) +----------------------------------------------- + +With the Kernel memory extension, the Memory Controller is able to limit +the amount of kernel memory used by the system. Kernel memory is fundamentally +different than user memory, since it can't be swapped out, which makes it +possible to DoS the system by consuming too much of this precious resource. + +Kernel memory accounting is enabled for all memory cgroups by default. But +it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel +at boot time. In this case, kernel memory will not be accounted at all. + +Kernel memory limits are not imposed for the root cgroup. Usage for the root +cgroup may or may not be accounted. The memory used is accumulated into +memory.kmem.usage_in_bytes, or in a separate counter when it makes sense. +(currently only for tcp). + +The main "kmem" counter is fed into the main counter, so kmem charges will +also be visible from the user counter. + +Currently no soft limit is implemented for kernel memory. It is future work +to trigger slab reclaim when those limits are reached. + +2.7.1 Current Kernel Memory resources accounted +----------------------------------------------- + +stack pages: + every process consumes some stack pages. By accounting into + kernel memory, we prevent new processes from being created when the kernel + memory usage is too high. + +slab pages: + pages allocated by the SLAB or SLUB allocator are tracked. A copy + of each kmem_cache is created every time the cache is touched by the first time + from inside the memcg. The creation is done lazily, so some objects can still be + skipped while the cache is being created. All objects in a slab page should + belong to the same memcg. This only fails to hold when a task is migrated to a + different memcg during the page allocation by the cache. + +sockets memory pressure: + some sockets protocols have memory pressure + thresholds. The Memory Controller allows them to be controlled individually + per cgroup, instead of globally. + +tcp memory pressure: + sockets memory pressure for the tcp protocol. + +2.7.2 Common use cases +---------------------- + +Because the "kmem" counter is fed to the main user counter, kernel memory can +never be limited completely independently of user memory. Say "U" is the user +limit, and "K" the kernel limit. There are three possible ways limits can be +set: + +U != 0, K = unlimited: + This is the standard memcg limitation mechanism already present before kmem + accounting. Kernel memory is completely ignored. + +U != 0, K < U: + Kernel memory is a subset of the user memory. This setup is useful in + deployments where the total amount of memory per-cgroup is overcommited. + Overcommiting kernel memory limits is definitely not recommended, since the + box can still run out of non-reclaimable memory. + In this case, the admin could set up K so that the sum of all groups is + never greater than the total memory, and freely set U at the cost of his + QoS. + +WARNING: + In the current implementation, memory reclaim will NOT be + triggered for a cgroup when it hits K while staying below U, which makes + this setup impractical. + +U != 0, K >= U: + Since kmem charges will also be fed to the user counter and reclaim will be + triggered for the cgroup for both kinds of memory. This setup gives the + admin a unified view of memory, and it is also useful for people who just + want to track kernel memory usage. + +3. User Interface +================= + +3.0. Configuration +------------------ + +a. Enable CONFIG_CGROUPS +b. Enable CONFIG_MEMCG +c. Enable CONFIG_MEMCG_SWAP (to use swap extension) +d. Enable CONFIG_MEMCG_KMEM (to use kmem extension) + +3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) +------------------------------------------------------------------- + +:: + + # mount -t tmpfs none /sys/fs/cgroup + # mkdir /sys/fs/cgroup/memory + # mount -t cgroup none /sys/fs/cgroup/memory -o memory + +3.2. Make the new group and move bash into it:: + + # mkdir /sys/fs/cgroup/memory/0 + # echo $$ > /sys/fs/cgroup/memory/0/tasks + +Since now we're in the 0 cgroup, we can alter the memory limit:: + + # echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes + +NOTE: + We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, + mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, + Gibibytes.) + +NOTE: + We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``. + +NOTE: + We cannot set limits on the root cgroup any more. + +:: + + # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes + 4194304 + +We can check the usage:: + + # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes + 1216512 + +A successful write to this file does not guarantee a successful setting of +this limit to the value written into the file. This can be due to a +number of factors, such as rounding up to page boundaries or the total +availability of memory on the system. The user is required to re-read +this file after a write to guarantee the value committed by the kernel:: + + # echo 1 > memory.limit_in_bytes + # cat memory.limit_in_bytes + 4096 + +The memory.failcnt field gives the number of times that the cgroup limit was +exceeded. + +The memory.stat file gives accounting information. Now, the number of +caches, RSS and Active pages/Inactive pages are shown. + +4. Testing +========== + +For testing features and implementation, see memcg_test.txt. + +Performance test is also important. To see pure memory controller's overhead, +testing on tmpfs will give you good numbers of small overheads. +Example: do kernel make on tmpfs. + +Page-fault scalability is also important. At measuring parallel +page fault test, multi-process test may be better than multi-thread +test because it has noise of shared objects/status. + +But the above two are testing extreme situations. +Trying usual test under memory controller is always helpful. + +4.1 Troubleshooting +------------------- + +Sometimes a user might find that the application under a cgroup is +terminated by the OOM killer. There are several causes for this: + +1. The cgroup limit is too low (just too low to do anything useful) +2. The user is using anonymous memory and swap is turned off or too low + +A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of +some of the pages cached in the cgroup (page cache pages). + +To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and +seeing what happens will be helpful. + +4.2 Task migration +------------------ + +When a task migrates from one cgroup to another, its charge is not +carried forward by default. The pages allocated from the original cgroup still +remain charged to it, the charge is dropped when the page is freed or +reclaimed. + +You can move charges of a task along with task migration. +See 8. "Move charges at task migration" + +4.3 Removing a cgroup +--------------------- + +A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a +cgroup might have some charge associated with it, even though all +tasks have migrated away from it. (because we charge against pages, not +against tasks.) + +We move the stats to root (if use_hierarchy==0) or parent (if +use_hierarchy==1), and no change on the charge except uncharging +from the child. + +Charges recorded in swap information is not updated at removal of cgroup. +Recorded information is discarded and a cgroup which uses swap (swapcache) +will be charged as a new owner of it. + +About use_hierarchy, see Section 6. + +5. Misc. interfaces +=================== + +5.1 force_empty +--------------- + memory.force_empty interface is provided to make cgroup's memory usage empty. + When writing anything to this:: + + # echo 0 > memory.force_empty + + the cgroup will be reclaimed and as many pages reclaimed as possible. + + The typical use case for this interface is before calling rmdir(). + Though rmdir() offlines memcg, but the memcg may still stay there due to + charged file caches. Some out-of-use page caches may keep charged until + memory pressure happens. If you want to avoid that, force_empty will be useful. + + Also, note that when memory.kmem.limit_in_bytes is set the charges due to + kernel pages will still be seen. This is not considered a failure and the + write will still return success. In this case, it is expected that + memory.kmem.usage_in_bytes == memory.usage_in_bytes. + + About use_hierarchy, see Section 6. + +5.2 stat file +------------- + +memory.stat file includes following statistics + +per-memory cgroup local status +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +=============== =============================================================== +cache # of bytes of page cache memory. +rss # of bytes of anonymous and swap cache memory (includes + transparent hugepages). +rss_huge # of bytes of anonymous transparent hugepages. +mapped_file # of bytes of mapped file (includes tmpfs/shmem) +pgpgin # of charging events to the memory cgroup. The charging + event happens each time a page is accounted as either mapped + anon page(RSS) or cache page(Page Cache) to the cgroup. +pgpgout # of uncharging events to the memory cgroup. The uncharging + event happens each time a page is unaccounted from the cgroup. +swap # of bytes of swap usage +dirty # of bytes that are waiting to get written back to the disk. +writeback # of bytes of file/anon cache that are queued for syncing to + disk. +inactive_anon # of bytes of anonymous and swap cache memory on inactive + LRU list. +active_anon # of bytes of anonymous and swap cache memory on active + LRU list. +inactive_file # of bytes of file-backed memory on inactive LRU list. +active_file # of bytes of file-backed memory on active LRU list. +unevictable # of bytes of memory that cannot be reclaimed (mlocked etc). +=============== =============================================================== + +status considering hierarchy (see memory.use_hierarchy settings) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +========================= =================================================== +hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy + under which the memory cgroup is +hierarchical_memsw_limit # of bytes of memory+swap limit with regard to + hierarchy under which memory cgroup is. + +total_ # hierarchical version of , which in + addition to the cgroup's own value includes the + sum of all hierarchical children's values of + , i.e. total_cache +========================= =================================================== + +The following additional stats are dependent on CONFIG_DEBUG_VM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +========================= ======================================== +recent_rotated_anon VM internal parameter. (see mm/vmscan.c) +recent_rotated_file VM internal parameter. (see mm/vmscan.c) +recent_scanned_anon VM internal parameter. (see mm/vmscan.c) +recent_scanned_file VM internal parameter. (see mm/vmscan.c) +========================= ======================================== + +Memo: + recent_rotated means recent frequency of LRU rotation. + recent_scanned means recent # of scans to LRU. + showing for better debug please see the code for meanings. + +Note: + Only anonymous and swap cache memory is listed as part of 'rss' stat. + This should not be confused with the true 'resident set size' or the + amount of physical memory used by the cgroup. + + 'rss + mapped_file" will give you resident set size of cgroup. + + (Note: file and shmem may be shared among other cgroups. In that case, + mapped_file is accounted only when the memory cgroup is owner of page + cache.) + +5.3 swappiness +-------------- + +Overrides /proc/sys/vm/swappiness for the particular group. The tunable +in the root cgroup corresponds to the global swappiness setting. + +Please note that unlike during the global reclaim, limit reclaim +enforces that 0 swappiness really prevents from any swapping even if +there is a swap storage available. This might lead to memcg OOM killer +if there are no file pages to reclaim. + +5.4 failcnt +----------- + +A memory cgroup provides memory.failcnt and memory.memsw.failcnt files. +This failcnt(== failure count) shows the number of times that a usage counter +hit its limit. When a memory cgroup hits a limit, failcnt increases and +memory under it will be reclaimed. + +You can reset failcnt by writing 0 to failcnt file:: + + # echo 0 > .../memory.failcnt + +5.5 usage_in_bytes +------------------ + +For efficiency, as other kernel components, memory cgroup uses some optimization +to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the +method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz +value for efficient access. (Of course, when necessary, it's synchronized.) +If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) +value in memory.stat(see 5.2). + +5.6 numa_stat +------------- + +This is similar to numa_maps but operates on a per-memcg basis. This is +useful for providing visibility into the numa locality information within +an memcg since the pages are allowed to be allocated from any physical +node. One of the use cases is evaluating application performance by +combining this information with the application's CPU allocation. + +Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" +per-node page counts including "hierarchical_" which sums up all +hierarchical children's values in addition to the memcg's own value. + +The output format of memory.numa_stat is:: + + total= N0= N1= ... + file= N0= N1= ... + anon= N0= N1= ... + unevictable= N0= N1= ... + hierarchical_= N0= N1= ... + +The "total" count is sum of file + anon + unevictable. + +6. Hierarchy support +==================== + +The memory controller supports a deep hierarchy and hierarchical accounting. +The hierarchy is created by creating the appropriate cgroups in the +cgroup filesystem. Consider for example, the following cgroup filesystem +hierarchy:: + + root + / | \ + / | \ + a b c + | \ + | \ + d e + +In the diagram above, with hierarchical accounting enabled, all memory +usage of e, is accounted to its ancestors up until the root (i.e, c and root), +that has memory.use_hierarchy enabled. If one of the ancestors goes over its +limit, the reclaim algorithm reclaims from the tasks in the ancestor and the +children of the ancestor. + +6.1 Enabling hierarchical accounting and reclaim +------------------------------------------------ + +A memory cgroup by default disables the hierarchy feature. Support +can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup:: + + # echo 1 > memory.use_hierarchy + +The feature can be disabled by:: + + # echo 0 > memory.use_hierarchy + +NOTE1: + Enabling/disabling will fail if either the cgroup already has other + cgroups created below it, or if the parent cgroup has use_hierarchy + enabled. + +NOTE2: + When panic_on_oom is set to "2", the whole system will panic in + case of an OOM event in any cgroup. + +7. Soft limits +============== + +Soft limits allow for greater sharing of memory. The idea behind soft limits +is to allow control groups to use as much of the memory as needed, provided + +a. There is no memory contention +b. They do not exceed their hard limit + +When the system detects memory contention or low memory, control groups +are pushed back to their soft limits. If the soft limit of each control +group is very high, they are pushed back as much as possible to make +sure that one control group does not starve the others of memory. + +Please note that soft limits is a best-effort feature; it comes with +no guarantees, but it does its best to make sure that when memory is +heavily contended for, memory is allocated based on the soft limit +hints/setup. Currently soft limit based reclaim is set up such that +it gets invoked from balance_pgdat (kswapd). + +7.1 Interface +------------- + +Soft limits can be setup by using the following commands (in this example we +assume a soft limit of 256 MiB):: + + # echo 256M > memory.soft_limit_in_bytes + +If we want to change this to 1G, we can at any time use:: + + # echo 1G > memory.soft_limit_in_bytes + +NOTE1: + Soft limits take effect over a long period of time, since they involve + reclaiming memory for balancing between memory cgroups +NOTE2: + It is recommended to set the soft limit always below the hard limit, + otherwise the hard limit will take precedence. + +8. Move charges at task migration +================================= + +Users can move charges associated with a task along with task migration, that +is, uncharge task's pages from the old cgroup and charge them to the new cgroup. +This feature is not supported in !CONFIG_MMU environments because of lack of +page tables. + +8.1 Interface +------------- + +This feature is disabled by default. It can be enabled (and disabled again) by +writing to memory.move_charge_at_immigrate of the destination cgroup. + +If you want to enable it:: + + # echo (some positive value) > memory.move_charge_at_immigrate + +Note: + Each bits of move_charge_at_immigrate has its own meaning about what type + of charges should be moved. See 8.2 for details. +Note: + Charges are moved only when you move mm->owner, in other words, + a leader of a thread group. +Note: + If we cannot find enough space for the task in the destination cgroup, we + try to make space by reclaiming memory. Task migration may fail if we + cannot make enough space. +Note: + It can take several seconds if you move charges much. + +And if you want disable it again:: + + # echo 0 > memory.move_charge_at_immigrate + +8.2 Type of charges which can be moved +-------------------------------------- + +Each bit in move_charge_at_immigrate has its own meaning about what type of +charges should be moved. But in any case, it must be noted that an account of +a page or a swap can be moved only when it is charged to the task's current +(old) memory cgroup. + ++---+--------------------------------------------------------------------------+ +|bit| what type of charges would be moved ? | ++===+==========================================================================+ +| 0 | A charge of an anonymous page (or swap of it) used by the target task. | +| | You must enable Swap Extension (see 2.4) to enable move of swap charges. | ++---+--------------------------------------------------------------------------+ +| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) | +| | and swaps of tmpfs file) mmapped by the target task. Unlike the case of | +| | anonymous pages, file pages (and swaps) in the range mmapped by the task | +| | will be moved even if the task hasn't done page fault, i.e. they might | +| | not be the task's "RSS", but other task's "RSS" that maps the same file. | +| | And mapcount of the page is ignored (the page can be moved even if | +| | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to | +| | enable move of swap charges. | ++---+--------------------------------------------------------------------------+ + +8.3 TODO +-------- + +- All of moving charge operations are done under cgroup_mutex. It's not good + behavior to hold the mutex too long, so we may need some trick. + +9. Memory thresholds +==================== + +Memory cgroup implements memory thresholds using the cgroups notification +API (see cgroups.txt). It allows to register multiple memory and memsw +thresholds and gets notifications when it crosses. + +To register a threshold, an application must: + +- create an eventfd using eventfd(2); +- open memory.usage_in_bytes or memory.memsw.usage_in_bytes; +- write string like " " to + cgroup.event_control. + +Application will be notified through eventfd when memory usage crosses +threshold in any direction. + +It's applicable for root and non-root cgroup. + +10. OOM Control +=============== + +memory.oom_control file is for OOM notification and other controls. + +Memory cgroup implements OOM notifier using the cgroup notification +API (See cgroups.txt). It allows to register multiple OOM notification +delivery and gets notification when OOM happens. + +To register a notifier, an application must: + + - create an eventfd using eventfd(2) + - open memory.oom_control file + - write string like " " to + cgroup.event_control + +The application will be notified through eventfd when OOM happens. +OOM notification doesn't work for the root cgroup. + +You can disable the OOM-killer by writing "1" to memory.oom_control file, as: + + #echo 1 > memory.oom_control + +If OOM-killer is disabled, tasks under cgroup will hang/sleep +in memory cgroup's OOM-waitqueue when they request accountable memory. + +For running them, you have to relax the memory cgroup's OOM status by + + * enlarge limit or reduce usage. + +To reduce usage, + + * kill some tasks. + * move some tasks to other group with account migration. + * remove some files (on tmpfs?) + +Then, stopped tasks will work again. + +At reading, current status of OOM is shown. + + - oom_kill_disable 0 or 1 + (if 1, oom-killer is disabled) + - under_oom 0 or 1 + (if 1, the memory cgroup is under OOM, tasks may be stopped.) + +11. Memory Pressure +=================== + +The pressure level notifications can be used to monitor the memory +allocation cost; based on the pressure, applications can implement +different strategies of managing their memory resources. The pressure +levels are defined as following: + +The "low" level means that the system is reclaiming memory for new +allocations. Monitoring this reclaiming activity might be useful for +maintaining cache level. Upon notification, the program (typically +"Activity Manager") might analyze vmstat and act in advance (i.e. +prematurely shutdown unimportant services). + +The "medium" level means that the system is experiencing medium memory +pressure, the system might be making swap, paging out active file caches, +etc. Upon this event applications may decide to further analyze +vmstat/zoneinfo/memcg or internal memory usage statistics and free any +resources that can be easily reconstructed or re-read from a disk. + +The "critical" level means that the system is actively thrashing, it is +about to out of memory (OOM) or even the in-kernel OOM killer is on its +way to trigger. Applications should do whatever they can to help the +system. It might be too late to consult with vmstat or any other +statistics, so it's advisable to take an immediate action. + +By default, events are propagated upward until the event is handled, i.e. the +events are not pass-through. For example, you have three cgroups: A->B->C. Now +you set up an event listener on cgroups A, B and C, and suppose group C +experiences some pressure. In this situation, only group C will receive the +notification, i.e. groups A and B will not receive it. This is done to avoid +excessive "broadcasting" of messages, which disturbs the system and which is +especially bad if we are low on memory or thrashing. Group B, will receive +notification only if there are no event listers for group C. + +There are three optional modes that specify different propagation behavior: + + - "default": this is the default behavior specified above. This mode is the + same as omitting the optional mode parameter, preserved by backwards + compatibility. + + - "hierarchy": events always propagate up to the root, similar to the default + behavior, except that propagation continues regardless of whether there are + event listeners at each level, with the "hierarchy" mode. In the above + example, groups A, B, and C will receive notification of memory pressure. + + - "local": events are pass-through, i.e. they only receive notifications when + memory pressure is experienced in the memcg for which the notification is + registered. In the above example, group C will receive notification if + registered for "local" notification and the group experiences memory + pressure. However, group B will never receive notification, regardless if + there is an event listener for group C or not, if group B is registered for + local notification. + +The level and event notification mode ("hierarchy" or "local", if necessary) are +specified by a comma-delimited string, i.e. "low,hierarchy" specifies +hierarchical, pass-through, notification for all ancestor memcgs. Notification +that is the default, non pass-through behavior, does not specify a mode. +"medium,local" specifies pass-through notification for the medium level. + +The file memory.pressure_level is only used to setup an eventfd. To +register a notification, an application must: + +- create an eventfd using eventfd(2); +- open memory.pressure_level; +- write string as " " + to cgroup.event_control. + +Application will be notified through eventfd when memory pressure is at +the specific level (or higher). Read/write operations to +memory.pressure_level are no implemented. + +Test: + + Here is a small script example that makes a new cgroup, sets up a + memory limit, sets up a notification in the cgroup and then makes child + cgroup experience a critical pressure:: + + # cd /sys/fs/cgroup/memory/ + # mkdir foo + # cd foo + # cgroup_event_listener memory.pressure_level low,hierarchy & + # echo 8000000 > memory.limit_in_bytes + # echo 8000000 > memory.memsw.limit_in_bytes + # echo $$ > tasks + # dd if=/dev/zero | read x + + (Expect a bunch of notifications, and eventually, the oom-killer will + trigger.) + +12. TODO +======== + +1. Make per-cgroup scanner reclaim not-shared pages first +2. Teach controller to account for shared-pages +3. Start reclamation in the background when the limit is + not yet hit but the usage is getting closer + +Summary +======= + +Overall, the memory controller has been a stable controller and has been +commented and discussed quite extensively in the community. + +References +========== + +1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ +2. Singh, Balbir. Memory Controller (RSS Control), + http://lwn.net/Articles/222762/ +3. Emelianov, Pavel. Resource controllers based on process cgroups + http://lkml.org/lkml/2007/3/6/198 +4. Emelianov, Pavel. RSS controller based on process cgroups (v2) + http://lkml.org/lkml/2007/4/9/78 +5. Emelianov, Pavel. RSS controller based on process cgroups (v3) + http://lkml.org/lkml/2007/5/30/244 +6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ +7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control + subsystem (v3), http://lwn.net/Articles/235534/ +8. Singh, Balbir. RSS controller v2 test results (lmbench), + http://lkml.org/lkml/2007/5/17/232 +9. Singh, Balbir. RSS controller v2 AIM9 results + http://lkml.org/lkml/2007/5/18/1 +10. Singh, Balbir. Memory controller v6 test results, + http://lkml.org/lkml/2007/8/19/36 +11. Singh, Balbir. Memory controller introduction (v6), + http://lkml.org/lkml/2007/8/17/69 +12. Corbet, Jonathan, Controlling memory use in cgroups, + http://lwn.net/Articles/243795/ diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt deleted file mode 100644 index a33cedf85427..000000000000 --- a/Documentation/cgroup-v1/memory.txt +++ /dev/null @@ -1,892 +0,0 @@ -Memory Resource Controller - -NOTE: This document is hopelessly outdated and it asks for a complete - rewrite. It still contains a useful information so we are keeping it - here but make sure to check the current code if you need a deeper - understanding. - -NOTE: The Memory Resource Controller has generically been referred to as the - memory controller in this document. Do not confuse memory controller - used here with the memory controller that is used in hardware. - -(For editors) -In this document: - When we mention a cgroup (cgroupfs's directory) with memory controller, - we call it "memory cgroup". When you see git-log and source code, you'll - see patch's title and function names tend to use "memcg". - In this document, we avoid using it. - -Benefits and Purpose of the memory controller - -The memory controller isolates the memory behaviour of a group of tasks -from the rest of the system. The article on LWN [12] mentions some probable -uses of the memory controller. The memory controller can be used to - -a. Isolate an application or a group of applications - Memory-hungry applications can be isolated and limited to a smaller - amount of memory. -b. Create a cgroup with a limited amount of memory; this can be used - as a good alternative to booting with mem=XXXX. -c. Virtualization solutions can control the amount of memory they want - to assign to a virtual machine instance. -d. A CD/DVD burner could control the amount of memory used by the - rest of the system to ensure that burning does not fail due to lack - of available memory. -e. There are several other use cases; find one or use the controller just - for fun (to learn and hack on the VM subsystem). - -Current Status: linux-2.6.34-mmotm(development version of 2010/April) - -Features: - - accounting anonymous pages, file caches, swap caches usage and limiting them. - - pages are linked to per-memcg LRU exclusively, and there is no global LRU. - - optionally, memory+swap usage can be accounted and limited. - - hierarchical accounting - - soft limit - - moving (recharging) account at moving a task is selectable. - - usage threshold notifier - - memory pressure notifier - - oom-killer disable knob and oom-notifier - - Root cgroup has no limit controls. - - Kernel memory support is a work in progress, and the current version provides - basically functionality. (See Section 2.7) - -Brief summary of control files. - - tasks # attach a task(thread) and show list of threads - cgroup.procs # show list of processes - cgroup.event_control # an interface for event_fd() - memory.usage_in_bytes # show current usage for memory - (See 5.5 for details) - memory.memsw.usage_in_bytes # show current usage for memory+Swap - (See 5.5 for details) - memory.limit_in_bytes # set/show limit of memory usage - memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage - memory.failcnt # show the number of memory usage hits limits - memory.memsw.failcnt # show the number of memory+Swap hits limits - memory.max_usage_in_bytes # show max memory usage recorded - memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded - memory.soft_limit_in_bytes # set/show soft limit of memory usage - memory.stat # show various statistics - memory.use_hierarchy # set/show hierarchical account enabled - memory.force_empty # trigger forced page reclaim - memory.pressure_level # set memory pressure notifications - memory.swappiness # set/show swappiness parameter of vmscan - (See sysctl's vm.swappiness) - memory.move_charge_at_immigrate # set/show controls of moving charges - memory.oom_control # set/show oom controls. - memory.numa_stat # show the number of memory usage per numa node - - memory.kmem.limit_in_bytes # set/show hard limit for kernel memory - memory.kmem.usage_in_bytes # show current kernel memory allocation - memory.kmem.failcnt # show the number of kernel memory usage hits limits - memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded - - memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory - memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation - memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits - memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded - -1. History - -The memory controller has a long history. A request for comments for the memory -controller was posted by Balbir Singh [1]. At the time the RFC was posted -there were several implementations for memory control. The goal of the -RFC was to build consensus and agreement for the minimal features required -for memory control. The first RSS controller was posted by Balbir Singh[2] -in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the -RSS controller. At OLS, at the resource management BoF, everyone suggested -that we handle both page cache and RSS together. Another request was raised -to allow user space handling of OOM. The current memory controller is -at version 6; it combines both mapped (RSS) and unmapped Page -Cache Control [11]. - -2. Memory Control - -Memory is a unique resource in the sense that it is present in a limited -amount. If a task requires a lot of CPU processing, the task can spread -its processing over a period of hours, days, months or years, but with -memory, the same physical memory needs to be reused to accomplish the task. - -The memory controller implementation has been divided into phases. These -are: - -1. Memory controller -2. mlock(2) controller -3. Kernel user memory accounting and slab control -4. user mappings length controller - -The memory controller is the first controller developed. - -2.1. Design - -The core of the design is a counter called the page_counter. The -page_counter tracks the current memory usage and limit of the group of -processes associated with the controller. Each cgroup has a memory controller -specific data structure (mem_cgroup) associated with it. - -2.2. Accounting - - +--------------------+ - | mem_cgroup | - | (page_counter) | - +--------------------+ - / ^ \ - / | \ - +---------------+ | +---------------+ - | mm_struct | |.... | mm_struct | - | | | | | - +---------------+ | +---------------+ - | - + --------------+ - | - +---------------+ +------+--------+ - | page +----------> page_cgroup| - | | | | - +---------------+ +---------------+ - - (Figure 1: Hierarchy of Accounting) - - -Figure 1 shows the important aspects of the controller - -1. Accounting happens per cgroup -2. Each mm_struct knows about which cgroup it belongs to -3. Each page has a pointer to the page_cgroup, which in turn knows the - cgroup it belongs to - -The accounting is done as follows: mem_cgroup_charge_common() is invoked to -set up the necessary data structures and check if the cgroup that is being -charged is over its limit. If it is, then reclaim is invoked on the cgroup. -More details can be found in the reclaim section of this document. -If everything goes well, a page meta-data-structure called page_cgroup is -updated. page_cgroup has its own LRU on cgroup. -(*) page_cgroup structure is allocated at boot/memory-hotplug time. - -2.2.1 Accounting details - -All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. -Some pages which are never reclaimable and will not be on the LRU -are not accounted. We just account pages under usual VM management. - -RSS pages are accounted at page_fault unless they've already been accounted -for earlier. A file page will be accounted for as Page Cache when it's -inserted into inode (radix-tree). While it's mapped into the page tables of -processes, duplicate accounting is carefully avoided. - -An RSS page is unaccounted when it's fully unmapped. A PageCache page is -unaccounted when it's removed from radix-tree. Even if RSS pages are fully -unmapped (by kswapd), they may exist as SwapCache in the system until they -are really freed. Such SwapCaches are also accounted. -A swapped-in page is not accounted until it's mapped. - -Note: The kernel does swapin-readahead and reads multiple swaps at once. -This means swapped-in pages may contain pages for other tasks than a task -causing page fault. So, we avoid accounting at swap-in I/O. - -At page migration, accounting information is kept. - -Note: we just account pages-on-LRU because our purpose is to control amount -of used pages; not-on-LRU pages tend to be out-of-control from VM view. - -2.3 Shared Page Accounting - -Shared pages are accounted on the basis of the first touch approach. The -cgroup that first touches a page is accounted for the page. The principle -behind this approach is that a cgroup that aggressively uses a shared -page will eventually get charged for it (once it is uncharged from -the cgroup that brought it in -- this will happen on memory pressure). - -But see section 8.2: when moving a task to another cgroup, its pages may -be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. - -Exception: If CONFIG_MEMCG_SWAP is not used. -When you do swapoff and make swapped-out pages of shmem(tmpfs) to -be backed into memory in force, charges for pages are accounted against the -caller of swapoff rather than the users of shmem. - -2.4 Swap Extension (CONFIG_MEMCG_SWAP) - -Swap Extension allows you to record charge for swap. A swapped-in page is -charged back to original page allocator if possible. - -When swap is accounted, following files are added. - - memory.memsw.usage_in_bytes. - - memory.memsw.limit_in_bytes. - -memsw means memory+swap. Usage of memory+swap is limited by -memsw.limit_in_bytes. - -Example: Assume a system with 4G of swap. A task which allocates 6G of memory -(by mistake) under 2G memory limitation will use all swap. -In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap. -By using the memsw limit, you can avoid system OOM which can be caused by swap -shortage. - -* why 'memory+swap' rather than swap. -The global LRU(kswapd) can swap out arbitrary pages. Swap-out means -to move account from memory to swap...there is no change in usage of -memory+swap. In other words, when we want to limit the usage of swap without -affecting global LRU, memory+swap limit is better than just limiting swap from -an OS point of view. - -* What happens when a cgroup hits memory.memsw.limit_in_bytes -When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out -in this cgroup. Then, swap-out will not be done by cgroup routine and file -caches are dropped. But as mentioned above, global LRU can do swapout memory -from it for sanity of the system's memory management state. You can't forbid -it by cgroup. - -2.5 Reclaim - -Each cgroup maintains a per cgroup LRU which has the same structure as -global VM. When a cgroup goes over its limit, we first try -to reclaim memory from the cgroup so as to make space for the new -pages that the cgroup has touched. If the reclaim is unsuccessful, -an OOM routine is invoked to select and kill the bulkiest task in the -cgroup. (See 10. OOM Control below.) - -The reclaim algorithm has not been modified for cgroups, except that -pages that are selected for reclaiming come from the per-cgroup LRU -list. - -NOTE: Reclaim does not work for the root cgroup, since we cannot set any -limits on the root cgroup. - -Note2: When panic_on_oom is set to "2", the whole system will panic. - -When oom event notifier is registered, event will be delivered. -(See oom_control section) - -2.6 Locking - - lock_page_cgroup()/unlock_page_cgroup() should not be called under - the i_pages lock. - - Other lock order is following: - PG_locked. - mm->page_table_lock - pgdat->lru_lock - lock_page_cgroup. - In many cases, just lock_page_cgroup() is called. - per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by - pgdat->lru_lock, it has no lock of its own. - -2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) - -With the Kernel memory extension, the Memory Controller is able to limit -the amount of kernel memory used by the system. Kernel memory is fundamentally -different than user memory, since it can't be swapped out, which makes it -possible to DoS the system by consuming too much of this precious resource. - -Kernel memory accounting is enabled for all memory cgroups by default. But -it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel -at boot time. In this case, kernel memory will not be accounted at all. - -Kernel memory limits are not imposed for the root cgroup. Usage for the root -cgroup may or may not be accounted. The memory used is accumulated into -memory.kmem.usage_in_bytes, or in a separate counter when it makes sense. -(currently only for tcp). -The main "kmem" counter is fed into the main counter, so kmem charges will -also be visible from the user counter. - -Currently no soft limit is implemented for kernel memory. It is future work -to trigger slab reclaim when those limits are reached. - -2.7.1 Current Kernel Memory resources accounted - -* stack pages: every process consumes some stack pages. By accounting into -kernel memory, we prevent new processes from being created when the kernel -memory usage is too high. - -* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy -of each kmem_cache is created every time the cache is touched by the first time -from inside the memcg. The creation is done lazily, so some objects can still be -skipped while the cache is being created. All objects in a slab page should -belong to the same memcg. This only fails to hold when a task is migrated to a -different memcg during the page allocation by the cache. - -* sockets memory pressure: some sockets protocols have memory pressure -thresholds. The Memory Controller allows them to be controlled individually -per cgroup, instead of globally. - -* tcp memory pressure: sockets memory pressure for the tcp protocol. - -2.7.2 Common use cases - -Because the "kmem" counter is fed to the main user counter, kernel memory can -never be limited completely independently of user memory. Say "U" is the user -limit, and "K" the kernel limit. There are three possible ways limits can be -set: - - U != 0, K = unlimited: - This is the standard memcg limitation mechanism already present before kmem - accounting. Kernel memory is completely ignored. - - U != 0, K < U: - Kernel memory is a subset of the user memory. This setup is useful in - deployments where the total amount of memory per-cgroup is overcommited. - Overcommiting kernel memory limits is definitely not recommended, since the - box can still run out of non-reclaimable memory. - In this case, the admin could set up K so that the sum of all groups is - never greater than the total memory, and freely set U at the cost of his - QoS. - WARNING: In the current implementation, memory reclaim will NOT be - triggered for a cgroup when it hits K while staying below U, which makes - this setup impractical. - - U != 0, K >= U: - Since kmem charges will also be fed to the user counter and reclaim will be - triggered for the cgroup for both kinds of memory. This setup gives the - admin a unified view of memory, and it is also useful for people who just - want to track kernel memory usage. - -3. User Interface - -3.0. Configuration - -a. Enable CONFIG_CGROUPS -b. Enable CONFIG_MEMCG -c. Enable CONFIG_MEMCG_SWAP (to use swap extension) -d. Enable CONFIG_MEMCG_KMEM (to use kmem extension) - -3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) -# mount -t tmpfs none /sys/fs/cgroup -# mkdir /sys/fs/cgroup/memory -# mount -t cgroup none /sys/fs/cgroup/memory -o memory - -3.2. Make the new group and move bash into it -# mkdir /sys/fs/cgroup/memory/0 -# echo $$ > /sys/fs/cgroup/memory/0/tasks - -Since now we're in the 0 cgroup, we can alter the memory limit: -# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes - -NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, -mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) - -NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). -NOTE: We cannot set limits on the root cgroup any more. - -# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes -4194304 - -We can check the usage: -# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes -1216512 - -A successful write to this file does not guarantee a successful setting of -this limit to the value written into the file. This can be due to a -number of factors, such as rounding up to page boundaries or the total -availability of memory on the system. The user is required to re-read -this file after a write to guarantee the value committed by the kernel. - -# echo 1 > memory.limit_in_bytes -# cat memory.limit_in_bytes -4096 - -The memory.failcnt field gives the number of times that the cgroup limit was -exceeded. - -The memory.stat file gives accounting information. Now, the number of -caches, RSS and Active pages/Inactive pages are shown. - -4. Testing - -For testing features and implementation, see memcg_test.txt. - -Performance test is also important. To see pure memory controller's overhead, -testing on tmpfs will give you good numbers of small overheads. -Example: do kernel make on tmpfs. - -Page-fault scalability is also important. At measuring parallel -page fault test, multi-process test may be better than multi-thread -test because it has noise of shared objects/status. - -But the above two are testing extreme situations. -Trying usual test under memory controller is always helpful. - -4.1 Troubleshooting - -Sometimes a user might find that the application under a cgroup is -terminated by the OOM killer. There are several causes for this: - -1. The cgroup limit is too low (just too low to do anything useful) -2. The user is using anonymous memory and swap is turned off or too low - -A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of -some of the pages cached in the cgroup (page cache pages). - -To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and -seeing what happens will be helpful. - -4.2 Task migration - -When a task migrates from one cgroup to another, its charge is not -carried forward by default. The pages allocated from the original cgroup still -remain charged to it, the charge is dropped when the page is freed or -reclaimed. - -You can move charges of a task along with task migration. -See 8. "Move charges at task migration" - -4.3 Removing a cgroup - -A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a -cgroup might have some charge associated with it, even though all -tasks have migrated away from it. (because we charge against pages, not -against tasks.) - -We move the stats to root (if use_hierarchy==0) or parent (if -use_hierarchy==1), and no change on the charge except uncharging -from the child. - -Charges recorded in swap information is not updated at removal of cgroup. -Recorded information is discarded and a cgroup which uses swap (swapcache) -will be charged as a new owner of it. - -About use_hierarchy, see Section 6. - -5. Misc. interfaces. - -5.1 force_empty - memory.force_empty interface is provided to make cgroup's memory usage empty. - When writing anything to this - - # echo 0 > memory.force_empty - - the cgroup will be reclaimed and as many pages reclaimed as possible. - - The typical use case for this interface is before calling rmdir(). - Though rmdir() offlines memcg, but the memcg may still stay there due to - charged file caches. Some out-of-use page caches may keep charged until - memory pressure happens. If you want to avoid that, force_empty will be useful. - - Also, note that when memory.kmem.limit_in_bytes is set the charges due to - kernel pages will still be seen. This is not considered a failure and the - write will still return success. In this case, it is expected that - memory.kmem.usage_in_bytes == memory.usage_in_bytes. - - About use_hierarchy, see Section 6. - -5.2 stat file - -memory.stat file includes following statistics - -# per-memory cgroup local status -cache - # of bytes of page cache memory. -rss - # of bytes of anonymous and swap cache memory (includes - transparent hugepages). -rss_huge - # of bytes of anonymous transparent hugepages. -mapped_file - # of bytes of mapped file (includes tmpfs/shmem) -pgpgin - # of charging events to the memory cgroup. The charging - event happens each time a page is accounted as either mapped - anon page(RSS) or cache page(Page Cache) to the cgroup. -pgpgout - # of uncharging events to the memory cgroup. The uncharging - event happens each time a page is unaccounted from the cgroup. -swap - # of bytes of swap usage -dirty - # of bytes that are waiting to get written back to the disk. -writeback - # of bytes of file/anon cache that are queued for syncing to - disk. -inactive_anon - # of bytes of anonymous and swap cache memory on inactive - LRU list. -active_anon - # of bytes of anonymous and swap cache memory on active - LRU list. -inactive_file - # of bytes of file-backed memory on inactive LRU list. -active_file - # of bytes of file-backed memory on active LRU list. -unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc). - -# status considering hierarchy (see memory.use_hierarchy settings) - -hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy - under which the memory cgroup is -hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to - hierarchy under which memory cgroup is. - -total_ - # hierarchical version of , which in - addition to the cgroup's own value includes the - sum of all hierarchical children's values of - , i.e. total_cache - -# The following additional stats are dependent on CONFIG_DEBUG_VM. - -recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) -recent_rotated_file - VM internal parameter. (see mm/vmscan.c) -recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) -recent_scanned_file - VM internal parameter. (see mm/vmscan.c) - -Memo: - recent_rotated means recent frequency of LRU rotation. - recent_scanned means recent # of scans to LRU. - showing for better debug please see the code for meanings. - -Note: - Only anonymous and swap cache memory is listed as part of 'rss' stat. - This should not be confused with the true 'resident set size' or the - amount of physical memory used by the cgroup. - 'rss + mapped_file" will give you resident set size of cgroup. - (Note: file and shmem may be shared among other cgroups. In that case, - mapped_file is accounted only when the memory cgroup is owner of page - cache.) - -5.3 swappiness - -Overrides /proc/sys/vm/swappiness for the particular group. The tunable -in the root cgroup corresponds to the global swappiness setting. - -Please note that unlike during the global reclaim, limit reclaim -enforces that 0 swappiness really prevents from any swapping even if -there is a swap storage available. This might lead to memcg OOM killer -if there are no file pages to reclaim. - -5.4 failcnt - -A memory cgroup provides memory.failcnt and memory.memsw.failcnt files. -This failcnt(== failure count) shows the number of times that a usage counter -hit its limit. When a memory cgroup hits a limit, failcnt increases and -memory under it will be reclaimed. - -You can reset failcnt by writing 0 to failcnt file. -# echo 0 > .../memory.failcnt - -5.5 usage_in_bytes - -For efficiency, as other kernel components, memory cgroup uses some optimization -to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the -method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz -value for efficient access. (Of course, when necessary, it's synchronized.) -If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) -value in memory.stat(see 5.2). - -5.6 numa_stat - -This is similar to numa_maps but operates on a per-memcg basis. This is -useful for providing visibility into the numa locality information within -an memcg since the pages are allowed to be allocated from any physical -node. One of the use cases is evaluating application performance by -combining this information with the application's CPU allocation. - -Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" -per-node page counts including "hierarchical_" which sums up all -hierarchical children's values in addition to the memcg's own value. - -The output format of memory.numa_stat is: - -total= N0= N1= ... -file= N0= N1= ... -anon= N0= N1= ... -unevictable= N0= N1= ... -hierarchical_= N0= N1= ... - -The "total" count is sum of file + anon + unevictable. - -6. Hierarchy support - -The memory controller supports a deep hierarchy and hierarchical accounting. -The hierarchy is created by creating the appropriate cgroups in the -cgroup filesystem. Consider for example, the following cgroup filesystem -hierarchy - - root - / | \ - / | \ - a b c - | \ - | \ - d e - -In the diagram above, with hierarchical accounting enabled, all memory -usage of e, is accounted to its ancestors up until the root (i.e, c and root), -that has memory.use_hierarchy enabled. If one of the ancestors goes over its -limit, the reclaim algorithm reclaims from the tasks in the ancestor and the -children of the ancestor. - -6.1 Enabling hierarchical accounting and reclaim - -A memory cgroup by default disables the hierarchy feature. Support -can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup - -# echo 1 > memory.use_hierarchy - -The feature can be disabled by - -# echo 0 > memory.use_hierarchy - -NOTE1: Enabling/disabling will fail if either the cgroup already has other - cgroups created below it, or if the parent cgroup has use_hierarchy - enabled. - -NOTE2: When panic_on_oom is set to "2", the whole system will panic in - case of an OOM event in any cgroup. - -7. Soft limits - -Soft limits allow for greater sharing of memory. The idea behind soft limits -is to allow control groups to use as much of the memory as needed, provided - -a. There is no memory contention -b. They do not exceed their hard limit - -When the system detects memory contention or low memory, control groups -are pushed back to their soft limits. If the soft limit of each control -group is very high, they are pushed back as much as possible to make -sure that one control group does not starve the others of memory. - -Please note that soft limits is a best-effort feature; it comes with -no guarantees, but it does its best to make sure that when memory is -heavily contended for, memory is allocated based on the soft limit -hints/setup. Currently soft limit based reclaim is set up such that -it gets invoked from balance_pgdat (kswapd). - -7.1 Interface - -Soft limits can be setup by using the following commands (in this example we -assume a soft limit of 256 MiB) - -# echo 256M > memory.soft_limit_in_bytes - -If we want to change this to 1G, we can at any time use - -# echo 1G > memory.soft_limit_in_bytes - -NOTE1: Soft limits take effect over a long period of time, since they involve - reclaiming memory for balancing between memory cgroups -NOTE2: It is recommended to set the soft limit always below the hard limit, - otherwise the hard limit will take precedence. - -8. Move charges at task migration - -Users can move charges associated with a task along with task migration, that -is, uncharge task's pages from the old cgroup and charge them to the new cgroup. -This feature is not supported in !CONFIG_MMU environments because of lack of -page tables. - -8.1 Interface - -This feature is disabled by default. It can be enabled (and disabled again) by -writing to memory.move_charge_at_immigrate of the destination cgroup. - -If you want to enable it: - -# echo (some positive value) > memory.move_charge_at_immigrate - -Note: Each bits of move_charge_at_immigrate has its own meaning about what type - of charges should be moved. See 8.2 for details. -Note: Charges are moved only when you move mm->owner, in other words, - a leader of a thread group. -Note: If we cannot find enough space for the task in the destination cgroup, we - try to make space by reclaiming memory. Task migration may fail if we - cannot make enough space. -Note: It can take several seconds if you move charges much. - -And if you want disable it again: - -# echo 0 > memory.move_charge_at_immigrate - -8.2 Type of charges which can be moved - -Each bit in move_charge_at_immigrate has its own meaning about what type of -charges should be moved. But in any case, it must be noted that an account of -a page or a swap can be moved only when it is charged to the task's current -(old) memory cgroup. - - bit | what type of charges would be moved ? - -----+------------------------------------------------------------------------ - 0 | A charge of an anonymous page (or swap of it) used by the target task. - | You must enable Swap Extension (see 2.4) to enable move of swap charges. - -----+------------------------------------------------------------------------ - 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) - | and swaps of tmpfs file) mmapped by the target task. Unlike the case of - | anonymous pages, file pages (and swaps) in the range mmapped by the task - | will be moved even if the task hasn't done page fault, i.e. they might - | not be the task's "RSS", but other task's "RSS" that maps the same file. - | And mapcount of the page is ignored (the page can be moved even if - | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to - | enable move of swap charges. - -8.3 TODO - -- All of moving charge operations are done under cgroup_mutex. It's not good - behavior to hold the mutex too long, so we may need some trick. - -9. Memory thresholds - -Memory cgroup implements memory thresholds using the cgroups notification -API (see cgroups.txt). It allows to register multiple memory and memsw -thresholds and gets notifications when it crosses. - -To register a threshold, an application must: -- create an eventfd using eventfd(2); -- open memory.usage_in_bytes or memory.memsw.usage_in_bytes; -- write string like " " to - cgroup.event_control. - -Application will be notified through eventfd when memory usage crosses -threshold in any direction. - -It's applicable for root and non-root cgroup. - -10. OOM Control - -memory.oom_control file is for OOM notification and other controls. - -Memory cgroup implements OOM notifier using the cgroup notification -API (See cgroups.txt). It allows to register multiple OOM notification -delivery and gets notification when OOM happens. - -To register a notifier, an application must: - - create an eventfd using eventfd(2) - - open memory.oom_control file - - write string like " " to - cgroup.event_control - -The application will be notified through eventfd when OOM happens. -OOM notification doesn't work for the root cgroup. - -You can disable the OOM-killer by writing "1" to memory.oom_control file, as: - - #echo 1 > memory.oom_control - -If OOM-killer is disabled, tasks under cgroup will hang/sleep -in memory cgroup's OOM-waitqueue when they request accountable memory. - -For running them, you have to relax the memory cgroup's OOM status by - * enlarge limit or reduce usage. -To reduce usage, - * kill some tasks. - * move some tasks to other group with account migration. - * remove some files (on tmpfs?) - -Then, stopped tasks will work again. - -At reading, current status of OOM is shown. - oom_kill_disable 0 or 1 (if 1, oom-killer is disabled) - under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may - be stopped.) - -11. Memory Pressure - -The pressure level notifications can be used to monitor the memory -allocation cost; based on the pressure, applications can implement -different strategies of managing their memory resources. The pressure -levels are defined as following: - -The "low" level means that the system is reclaiming memory for new -allocations. Monitoring this reclaiming activity might be useful for -maintaining cache level. Upon notification, the program (typically -"Activity Manager") might analyze vmstat and act in advance (i.e. -prematurely shutdown unimportant services). - -The "medium" level means that the system is experiencing medium memory -pressure, the system might be making swap, paging out active file caches, -etc. Upon this event applications may decide to further analyze -vmstat/zoneinfo/memcg or internal memory usage statistics and free any -resources that can be easily reconstructed or re-read from a disk. - -The "critical" level means that the system is actively thrashing, it is -about to out of memory (OOM) or even the in-kernel OOM killer is on its -way to trigger. Applications should do whatever they can to help the -system. It might be too late to consult with vmstat or any other -statistics, so it's advisable to take an immediate action. - -By default, events are propagated upward until the event is handled, i.e. the -events are not pass-through. For example, you have three cgroups: A->B->C. Now -you set up an event listener on cgroups A, B and C, and suppose group C -experiences some pressure. In this situation, only group C will receive the -notification, i.e. groups A and B will not receive it. This is done to avoid -excessive "broadcasting" of messages, which disturbs the system and which is -especially bad if we are low on memory or thrashing. Group B, will receive -notification only if there are no event listers for group C. - -There are three optional modes that specify different propagation behavior: - - - "default": this is the default behavior specified above. This mode is the - same as omitting the optional mode parameter, preserved by backwards - compatibility. - - - "hierarchy": events always propagate up to the root, similar to the default - behavior, except that propagation continues regardless of whether there are - event listeners at each level, with the "hierarchy" mode. In the above - example, groups A, B, and C will receive notification of memory pressure. - - - "local": events are pass-through, i.e. they only receive notifications when - memory pressure is experienced in the memcg for which the notification is - registered. In the above example, group C will receive notification if - registered for "local" notification and the group experiences memory - pressure. However, group B will never receive notification, regardless if - there is an event listener for group C or not, if group B is registered for - local notification. - -The level and event notification mode ("hierarchy" or "local", if necessary) are -specified by a comma-delimited string, i.e. "low,hierarchy" specifies -hierarchical, pass-through, notification for all ancestor memcgs. Notification -that is the default, non pass-through behavior, does not specify a mode. -"medium,local" specifies pass-through notification for the medium level. - -The file memory.pressure_level is only used to setup an eventfd. To -register a notification, an application must: - -- create an eventfd using eventfd(2); -- open memory.pressure_level; -- write string as " " - to cgroup.event_control. - -Application will be notified through eventfd when memory pressure is at -the specific level (or higher). Read/write operations to -memory.pressure_level are no implemented. - -Test: - - Here is a small script example that makes a new cgroup, sets up a - memory limit, sets up a notification in the cgroup and then makes child - cgroup experience a critical pressure: - - # cd /sys/fs/cgroup/memory/ - # mkdir foo - # cd foo - # cgroup_event_listener memory.pressure_level low,hierarchy & - # echo 8000000 > memory.limit_in_bytes - # echo 8000000 > memory.memsw.limit_in_bytes - # echo $$ > tasks - # dd if=/dev/zero | read x - - (Expect a bunch of notifications, and eventually, the oom-killer will - trigger.) - -12. TODO - -1. Make per-cgroup scanner reclaim not-shared pages first -2. Teach controller to account for shared-pages -3. Start reclamation in the background when the limit is - not yet hit but the usage is getting closer - -Summary - -Overall, the memory controller has been a stable controller and has been -commented and discussed quite extensively in the community. - -References - -1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ -2. Singh, Balbir. Memory Controller (RSS Control), - http://lwn.net/Articles/222762/ -3. Emelianov, Pavel. Resource controllers based on process cgroups - http://lkml.org/lkml/2007/3/6/198 -4. Emelianov, Pavel. RSS controller based on process cgroups (v2) - http://lkml.org/lkml/2007/4/9/78 -5. Emelianov, Pavel. RSS controller based on process cgroups (v3) - http://lkml.org/lkml/2007/5/30/244 -6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ -7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control - subsystem (v3), http://lwn.net/Articles/235534/ -8. Singh, Balbir. RSS controller v2 test results (lmbench), - http://lkml.org/lkml/2007/5/17/232 -9. Singh, Balbir. RSS controller v2 AIM9 results - http://lkml.org/lkml/2007/5/18/1 -10. Singh, Balbir. Memory controller v6 test results, - http://lkml.org/lkml/2007/8/19/36 -11. Singh, Balbir. Memory controller introduction (v6), - http://lkml.org/lkml/2007/8/17/69 -12. Corbet, Jonathan, Controlling memory use in cgroups, - http://lwn.net/Articles/243795/ diff --git a/Documentation/cgroup-v1/net_cls.rst b/Documentation/cgroup-v1/net_cls.rst new file mode 100644 index 000000000000..a2cf272af7a0 --- /dev/null +++ b/Documentation/cgroup-v1/net_cls.rst @@ -0,0 +1,44 @@ +========================= +Network classifier cgroup +========================= + +The Network classifier cgroup provides an interface to +tag network packets with a class identifier (classid). + +The Traffic Controller (tc) can be used to assign +different priorities to packets from different cgroups. +Also, Netfilter (iptables) can use this tag to perform +actions on such packets. + +Creating a net_cls cgroups instance creates a net_cls.classid file. +This net_cls.classid value is initialized to 0. + +You can write hexadecimal values to net_cls.classid; the format for these +values is 0xAAAABBBB; AAAA is the major handle number and BBBB +is the minor handle number. +Reading net_cls.classid yields a decimal result. + +Example:: + + mkdir /sys/fs/cgroup/net_cls + mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls + mkdir /sys/fs/cgroup/net_cls/0 + echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid + +- setting a 10:1 handle:: + + cat /sys/fs/cgroup/net_cls/0/net_cls.classid + 1048577 + +- configuring tc:: + + tc qdisc add dev eth0 root handle 10: htb + tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit + +- creating traffic class 10:1:: + + tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup + +configuring iptables, basic example:: + + iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP diff --git a/Documentation/cgroup-v1/net_cls.txt b/Documentation/cgroup-v1/net_cls.txt deleted file mode 100644 index ec182346dea2..000000000000 --- a/Documentation/cgroup-v1/net_cls.txt +++ /dev/null @@ -1,39 +0,0 @@ -Network classifier cgroup -------------------------- - -The Network classifier cgroup provides an interface to -tag network packets with a class identifier (classid). - -The Traffic Controller (tc) can be used to assign -different priorities to packets from different cgroups. -Also, Netfilter (iptables) can use this tag to perform -actions on such packets. - -Creating a net_cls cgroups instance creates a net_cls.classid file. -This net_cls.classid value is initialized to 0. - -You can write hexadecimal values to net_cls.classid; the format for these -values is 0xAAAABBBB; AAAA is the major handle number and BBBB -is the minor handle number. -Reading net_cls.classid yields a decimal result. - -Example: -mkdir /sys/fs/cgroup/net_cls -mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls -mkdir /sys/fs/cgroup/net_cls/0 -echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid - - setting a 10:1 handle. - -cat /sys/fs/cgroup/net_cls/0/net_cls.classid -1048577 - -configuring tc: -tc qdisc add dev eth0 root handle 10: htb - -tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit - - creating traffic class 10:1 - -tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup - -configuring iptables, basic example: -iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP diff --git a/Documentation/cgroup-v1/net_prio.rst b/Documentation/cgroup-v1/net_prio.rst new file mode 100644 index 000000000000..b40905871c64 --- /dev/null +++ b/Documentation/cgroup-v1/net_prio.rst @@ -0,0 +1,57 @@ +======================= +Network priority cgroup +======================= + +The Network priority cgroup provides an interface to allow an administrator to +dynamically set the priority of network traffic generated by various +applications + +Nominally, an application would set the priority of its traffic via the +SO_PRIORITY socket option. This however, is not always possible because: + +1) The application may not have been coded to set this value +2) The priority of application traffic is often a site-specific administrative + decision rather than an application defined one. + +This cgroup allows an administrator to assign a process to a group which defines +the priority of egress traffic on a given interface. Network priority groups can +be created by first mounting the cgroup filesystem:: + + # mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio + +With the above step, the initial group acting as the parent accounting group +becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in +the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup. + +Each net_prio cgroup contains two files that are subsystem specific + +net_prio.prioidx + This file is read-only, and is simply informative. It contains a unique + integer value that the kernel uses as an internal representation of this + cgroup. + +net_prio.ifpriomap + This file contains a map of the priorities assigned to traffic originating + from processes in this group and egressing the system on various interfaces. + It contains a list of tuples in the form . Contents of this + file can be modified by echoing a string into the file using the same tuple + format. For example:: + + echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap + +This command would force any traffic originating from processes belonging to the +iscsi net_prio cgroup and egressing on interface eth0 to have the priority of +said traffic set to the value 5. The parent accounting group also has a +writeable 'net_prio.ifpriomap' file that can be used to set a system default +priority. + +Priorities are set immediately prior to queueing a frame to the device +queueing discipline (qdisc) so priorities will be assigned prior to the hardware +queue selection being made. + +One usage for the net_prio cgroup is with mqprio qdisc allowing application +traffic to be steered to hardware/driver based traffic classes. These mappings +can then be managed by administrators or other networking protocols such as +DCBX. + +A new net_prio cgroup inherits the parent's configuration. diff --git a/Documentation/cgroup-v1/net_prio.txt b/Documentation/cgroup-v1/net_prio.txt deleted file mode 100644 index a82cbd28ea8a..000000000000 --- a/Documentation/cgroup-v1/net_prio.txt +++ /dev/null @@ -1,55 +0,0 @@ -Network priority cgroup -------------------------- - -The Network priority cgroup provides an interface to allow an administrator to -dynamically set the priority of network traffic generated by various -applications - -Nominally, an application would set the priority of its traffic via the -SO_PRIORITY socket option. This however, is not always possible because: - -1) The application may not have been coded to set this value -2) The priority of application traffic is often a site-specific administrative - decision rather than an application defined one. - -This cgroup allows an administrator to assign a process to a group which defines -the priority of egress traffic on a given interface. Network priority groups can -be created by first mounting the cgroup filesystem. - -# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio - -With the above step, the initial group acting as the parent accounting group -becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in -the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup. - -Each net_prio cgroup contains two files that are subsystem specific - -net_prio.prioidx -This file is read-only, and is simply informative. It contains a unique integer -value that the kernel uses as an internal representation of this cgroup. - -net_prio.ifpriomap -This file contains a map of the priorities assigned to traffic originating from -processes in this group and egressing the system on various interfaces. It -contains a list of tuples in the form . Contents of this file -can be modified by echoing a string into the file using the same tuple format. -for example: - -echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap - -This command would force any traffic originating from processes belonging to the -iscsi net_prio cgroup and egressing on interface eth0 to have the priority of -said traffic set to the value 5. The parent accounting group also has a -writeable 'net_prio.ifpriomap' file that can be used to set a system default -priority. - -Priorities are set immediately prior to queueing a frame to the device -queueing discipline (qdisc) so priorities will be assigned prior to the hardware -queue selection being made. - -One usage for the net_prio cgroup is with mqprio qdisc allowing application -traffic to be steered to hardware/driver based traffic classes. These mappings -can then be managed by administrators or other networking protocols such as -DCBX. - -A new net_prio cgroup inherits the parent's configuration. diff --git a/Documentation/cgroup-v1/pids.rst b/Documentation/cgroup-v1/pids.rst new file mode 100644 index 000000000000..6acebd9e72c8 --- /dev/null +++ b/Documentation/cgroup-v1/pids.rst @@ -0,0 +1,92 @@ +========================= +Process Number Controller +========================= + +Abstract +-------- + +The process number controller is used to allow a cgroup hierarchy to stop any +new tasks from being fork()'d or clone()'d after a certain limit is reached. + +Since it is trivial to hit the task limit without hitting any kmemcg limits in +place, PIDs are a fundamental resource. As such, PID exhaustion must be +preventable in the scope of a cgroup hierarchy by allowing resource limiting of +the number of tasks in a cgroup. + +Usage +----- + +In order to use the `pids` controller, set the maximum number of tasks in +pids.max (this is not available in the root cgroup for obvious reasons). The +number of processes currently in the cgroup is given by pids.current. + +Organisational operations are not blocked by cgroup policies, so it is possible +to have pids.current > pids.max. This can be done by either setting the limit to +be smaller than pids.current, or attaching enough processes to the cgroup such +that pids.current > pids.max. However, it is not possible to violate a cgroup +policy through fork() or clone(). fork() and clone() will return -EAGAIN if the +creation of a new process would cause a cgroup policy to be violated. + +To set a cgroup to have no limit, set pids.max to "max". This is the default for +all new cgroups (N.B. that PID limits are hierarchical, so the most stringent +limit in the hierarchy is followed). + +pids.current tracks all child cgroup hierarchies, so parent/pids.current is a +superset of parent/child/pids.current. + +The pids.events file contains event counters: + + - max: Number of times fork failed because limit was hit. + +Example +------- + +First, we mount the pids controller:: + + # mkdir -p /sys/fs/cgroup/pids + # mount -t cgroup -o pids none /sys/fs/cgroup/pids + +Then we create a hierarchy, set limits and attach processes to it:: + + # mkdir -p /sys/fs/cgroup/pids/parent/child + # echo 2 > /sys/fs/cgroup/pids/parent/pids.max + # echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs + # cat /sys/fs/cgroup/pids/parent/pids.current + 2 + # + +It should be noted that attempts to overcome the set limit (2 in this case) will +fail:: + + # cat /sys/fs/cgroup/pids/parent/pids.current + 2 + # ( /bin/echo "Here's some processes for you." | cat ) + sh: fork: Resource temporary unavailable + # + +Even if we migrate to a child cgroup (which doesn't have a set limit), we will +not be able to overcome the most stringent limit in the hierarchy (in this case, +parent's):: + + # echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs + # cat /sys/fs/cgroup/pids/parent/pids.current + 2 + # cat /sys/fs/cgroup/pids/parent/child/pids.current + 2 + # cat /sys/fs/cgroup/pids/parent/child/pids.max + max + # ( /bin/echo "Here's some processes for you." | cat ) + sh: fork: Resource temporary unavailable + # + +We can set a limit that is smaller than pids.current, which will stop any new +processes from being forked at all (note that the shell itself counts towards +pids.current):: + + # echo 1 > /sys/fs/cgroup/pids/parent/pids.max + # /bin/echo "We can't even spawn a single process now." + sh: fork: Resource temporary unavailable + # echo 0 > /sys/fs/cgroup/pids/parent/pids.max + # /bin/echo "We can't even spawn a single process now." + sh: fork: Resource temporary unavailable + # diff --git a/Documentation/cgroup-v1/pids.txt b/Documentation/cgroup-v1/pids.txt deleted file mode 100644 index e105d708ccde..000000000000 --- a/Documentation/cgroup-v1/pids.txt +++ /dev/null @@ -1,88 +0,0 @@ - Process Number Controller - ========================= - -Abstract --------- - -The process number controller is used to allow a cgroup hierarchy to stop any -new tasks from being fork()'d or clone()'d after a certain limit is reached. - -Since it is trivial to hit the task limit without hitting any kmemcg limits in -place, PIDs are a fundamental resource. As such, PID exhaustion must be -preventable in the scope of a cgroup hierarchy by allowing resource limiting of -the number of tasks in a cgroup. - -Usage ------ - -In order to use the `pids` controller, set the maximum number of tasks in -pids.max (this is not available in the root cgroup for obvious reasons). The -number of processes currently in the cgroup is given by pids.current. - -Organisational operations are not blocked by cgroup policies, so it is possible -to have pids.current > pids.max. This can be done by either setting the limit to -be smaller than pids.current, or attaching enough processes to the cgroup such -that pids.current > pids.max. However, it is not possible to violate a cgroup -policy through fork() or clone(). fork() and clone() will return -EAGAIN if the -creation of a new process would cause a cgroup policy to be violated. - -To set a cgroup to have no limit, set pids.max to "max". This is the default for -all new cgroups (N.B. that PID limits are hierarchical, so the most stringent -limit in the hierarchy is followed). - -pids.current tracks all child cgroup hierarchies, so parent/pids.current is a -superset of parent/child/pids.current. - -The pids.events file contains event counters: - - max: Number of times fork failed because limit was hit. - -Example -------- - -First, we mount the pids controller: -# mkdir -p /sys/fs/cgroup/pids -# mount -t cgroup -o pids none /sys/fs/cgroup/pids - -Then we create a hierarchy, set limits and attach processes to it: -# mkdir -p /sys/fs/cgroup/pids/parent/child -# echo 2 > /sys/fs/cgroup/pids/parent/pids.max -# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs -# cat /sys/fs/cgroup/pids/parent/pids.current -2 -# - -It should be noted that attempts to overcome the set limit (2 in this case) will -fail: - -# cat /sys/fs/cgroup/pids/parent/pids.current -2 -# ( /bin/echo "Here's some processes for you." | cat ) -sh: fork: Resource temporary unavailable -# - -Even if we migrate to a child cgroup (which doesn't have a set limit), we will -not be able to overcome the most stringent limit in the hierarchy (in this case, -parent's): - -# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs -# cat /sys/fs/cgroup/pids/parent/pids.current -2 -# cat /sys/fs/cgroup/pids/parent/child/pids.current -2 -# cat /sys/fs/cgroup/pids/parent/child/pids.max -max -# ( /bin/echo "Here's some processes for you." | cat ) -sh: fork: Resource temporary unavailable -# - -We can set a limit that is smaller than pids.current, which will stop any new -processes from being forked at all (note that the shell itself counts towards -pids.current): - -# echo 1 > /sys/fs/cgroup/pids/parent/pids.max -# /bin/echo "We can't even spawn a single process now." -sh: fork: Resource temporary unavailable -# echo 0 > /sys/fs/cgroup/pids/parent/pids.max -# /bin/echo "We can't even spawn a single process now." -sh: fork: Resource temporary unavailable -# diff --git a/Documentation/cgroup-v1/rdma.rst b/Documentation/cgroup-v1/rdma.rst new file mode 100644 index 000000000000..2fcb0a9bf790 --- /dev/null +++ b/Documentation/cgroup-v1/rdma.rst @@ -0,0 +1,117 @@ +=============== +RDMA Controller +=============== + +.. Contents + + 1. Overview + 1-1. What is RDMA controller? + 1-2. Why RDMA controller needed? + 1-3. How is RDMA controller implemented? + 2. Usage Examples + +1. Overview +=========== + +1-1. What is RDMA controller? +----------------------------- + +RDMA controller allows user to limit RDMA/IB specific resources that a given +set of processes can use. These processes are grouped using RDMA controller. + +RDMA controller defines two resources which can be limited for processes of a +cgroup. + +1-2. Why RDMA controller needed? +-------------------------------- + +Currently user space applications can easily take away all the rdma verb +specific resources such as AH, CQ, QP, MR etc. Due to which other applications +in other cgroup or kernel space ULPs may not even get chance to allocate any +rdma resources. This can lead to service unavailability. + +Therefore RDMA controller is needed through which resource consumption +of processes can be limited. Through this controller different rdma +resources can be accounted. + +1-3. How is RDMA controller implemented? +---------------------------------------- + +RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains +resource accounting per cgroup, per device using resource pool structure. +Each such resource pool is limited up to 64 resources in given resource pool +by rdma cgroup, which can be extended later if required. + +This resource pool object is linked to the cgroup css. Typically there +are 0 to 4 resource pool instances per cgroup, per device in most use cases. +But nothing limits to have it more. At present hundreds of RDMA devices per +single cgroup may not be handled optimally, however there is no +known use case or requirement for such configuration either. + +Since RDMA resources can be allocated from any process and can be freed by any +of the child processes which shares the address space, rdma resources are +always owned by the creator cgroup css. This allows process migration from one +to other cgroup without major complexity of transferring resource ownership; +because such ownership is not really present due to shared nature of +rdma resources. Linking resources around css also ensures that cgroups can be +deleted after processes migrated. This allow progress migration as well with +active resources, even though that is not a primary use case. + +Whenever RDMA resource charging occurs, owner rdma cgroup is returned to +the caller. Same rdma cgroup should be passed while uncharging the resource. +This also allows process migrated with active RDMA resource to charge +to new owner cgroup for new resource. It also allows to uncharge resource of +a process from previously charged cgroup which is migrated to new cgroup, +even though that is not a primary use case. + +Resource pool object is created in following situations. +(a) User sets the limit and no previous resource pool exist for the device +of interest for the cgroup. +(b) No resource limits were configured, but IB/RDMA stack tries to +charge the resource. So that it correctly uncharge them when applications are +running without limits and later on when limits are enforced during uncharging, +otherwise usage count will drop to negative. + +Resource pool is destroyed if all the resource limits are set to max and +it is the last resource getting deallocated. + +User should set all the limit to max value if it intents to remove/unconfigure +the resource pool for a particular device. + +IB stack honors limits enforced by the rdma controller. When application +query about maximum resource limits of IB device, it returns minimum of +what is configured by user for a given cgroup and what is supported by +IB device. + +Following resources can be accounted by rdma controller. + + ========== ============================= + hca_handle Maximum number of HCA Handles + hca_object Maximum number of HCA Objects + ========== ============================= + +2. Usage Examples +================= + +(a) Configure resource limit:: + + echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max + echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max + +(b) Query resource limit:: + + cat /sys/fs/cgroup/rdma/2/rdma.max + #Output: + mlx4_0 hca_handle=2 hca_object=2000 + ocrdma1 hca_handle=3 hca_object=max + +(c) Query current usage:: + + cat /sys/fs/cgroup/rdma/2/rdma.current + #Output: + mlx4_0 hca_handle=1 hca_object=20 + ocrdma1 hca_handle=1 hca_object=23 + +(d) Delete resource limit:: + + echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.txt deleted file mode 100644 index 9bdb7fd03f83..000000000000 --- a/Documentation/cgroup-v1/rdma.txt +++ /dev/null @@ -1,109 +0,0 @@ - RDMA Controller - ---------------- - -Contents --------- - -1. Overview - 1-1. What is RDMA controller? - 1-2. Why RDMA controller needed? - 1-3. How is RDMA controller implemented? -2. Usage Examples - -1. Overview - -1-1. What is RDMA controller? ------------------------------ - -RDMA controller allows user to limit RDMA/IB specific resources that a given -set of processes can use. These processes are grouped using RDMA controller. - -RDMA controller defines two resources which can be limited for processes of a -cgroup. - -1-2. Why RDMA controller needed? --------------------------------- - -Currently user space applications can easily take away all the rdma verb -specific resources such as AH, CQ, QP, MR etc. Due to which other applications -in other cgroup or kernel space ULPs may not even get chance to allocate any -rdma resources. This can lead to service unavailability. - -Therefore RDMA controller is needed through which resource consumption -of processes can be limited. Through this controller different rdma -resources can be accounted. - -1-3. How is RDMA controller implemented? ----------------------------------------- - -RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains -resource accounting per cgroup, per device using resource pool structure. -Each such resource pool is limited up to 64 resources in given resource pool -by rdma cgroup, which can be extended later if required. - -This resource pool object is linked to the cgroup css. Typically there -are 0 to 4 resource pool instances per cgroup, per device in most use cases. -But nothing limits to have it more. At present hundreds of RDMA devices per -single cgroup may not be handled optimally, however there is no -known use case or requirement for such configuration either. - -Since RDMA resources can be allocated from any process and can be freed by any -of the child processes which shares the address space, rdma resources are -always owned by the creator cgroup css. This allows process migration from one -to other cgroup without major complexity of transferring resource ownership; -because such ownership is not really present due to shared nature of -rdma resources. Linking resources around css also ensures that cgroups can be -deleted after processes migrated. This allow progress migration as well with -active resources, even though that is not a primary use case. - -Whenever RDMA resource charging occurs, owner rdma cgroup is returned to -the caller. Same rdma cgroup should be passed while uncharging the resource. -This also allows process migrated with active RDMA resource to charge -to new owner cgroup for new resource. It also allows to uncharge resource of -a process from previously charged cgroup which is migrated to new cgroup, -even though that is not a primary use case. - -Resource pool object is created in following situations. -(a) User sets the limit and no previous resource pool exist for the device -of interest for the cgroup. -(b) No resource limits were configured, but IB/RDMA stack tries to -charge the resource. So that it correctly uncharge them when applications are -running without limits and later on when limits are enforced during uncharging, -otherwise usage count will drop to negative. - -Resource pool is destroyed if all the resource limits are set to max and -it is the last resource getting deallocated. - -User should set all the limit to max value if it intents to remove/unconfigure -the resource pool for a particular device. - -IB stack honors limits enforced by the rdma controller. When application -query about maximum resource limits of IB device, it returns minimum of -what is configured by user for a given cgroup and what is supported by -IB device. - -Following resources can be accounted by rdma controller. - hca_handle Maximum number of HCA Handles - hca_object Maximum number of HCA Objects - -2. Usage Examples ------------------ - -(a) Configure resource limit: -echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max -echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max - -(b) Query resource limit: -cat /sys/fs/cgroup/rdma/2/rdma.max -#Output: -mlx4_0 hca_handle=2 hca_object=2000 -ocrdma1 hca_handle=3 hca_object=max - -(c) Query current usage: -cat /sys/fs/cgroup/rdma/2/rdma.current -#Output: -mlx4_0 hca_handle=1 hca_object=20 -ocrdma1 hca_handle=1 hca_object=23 - -(d) Delete resource limit: -echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index d06e9a59a9f4..cad797a8a39e 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt @@ -98,7 +98,7 @@ A memory policy with a valid NodeList will be saved, as specified, for use at file creation time. When a task allocates a file in the file system, the mount option memory policy will be applied with a NodeList, if any, modified by the calling task's cpuset constraints -[See Documentation/cgroup-v1/cpusets.txt] and any optional flags, listed +[See Documentation/cgroup-v1/cpusets.rst] and any optional flags, listed below. If the resulting NodeLists is the empty set, the effective memory policy for the file will revert to "default" policy. diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt index b14e03ff3528..a7514343b660 100644 --- a/Documentation/scheduler/sched-deadline.txt +++ b/Documentation/scheduler/sched-deadline.txt @@ -652,7 +652,7 @@ CONTENTS -deadline tasks cannot have an affinity mask smaller that the entire root_domain they are created on. However, affinities can be specified - through the cpuset facility (Documentation/cgroup-v1/cpusets.txt). + through the cpuset facility (Documentation/cgroup-v1/cpusets.rst). 5.1 SCHED_DEADLINE and cpusets HOWTO ------------------------------------ diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index edd861c94c1b..d1328890ef28 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -215,7 +215,7 @@ SCHED_BATCH) tasks. These options need CONFIG_CGROUPS to be defined, and let the administrator create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See - Documentation/cgroup-v1/cgroups.txt for more information about this filesystem. + Documentation/cgroup-v1/cgroups.rst for more information about this filesystem. When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each group created using the pseudo filesystem. See example steps below to create diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index d8fce3e78457..c09f7a3fee66 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -133,7 +133,7 @@ This uses the cgroup virtual file system and "/cpu.rt_runtime_us" to control the CPU time reserved for each control group. For more information on working with control groups, you should read -Documentation/cgroup-v1/cgroups.txt as well. +Documentation/cgroup-v1/cgroups.rst as well. Group settings are checked against the following limits in order to keep the configuration schedulable: diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst index 5cae13e9a08b..0d830edae8fe 100644 --- a/Documentation/vm/numa.rst +++ b/Documentation/vm/numa.rst @@ -67,7 +67,7 @@ nodes. Each emulated node will manage a fraction of the underlying cells' physical memory. NUMA emluation is useful for testing NUMA kernel and application features on non-NUMA platforms, and as a sort of memory resource management mechanism when used together with cpusets. -[see Documentation/cgroup-v1/cpusets.txt] +[see Documentation/cgroup-v1/cpusets.rst] For each node with memory, Linux constructs an independent memory management subsystem, complete with its own free page lists, in-use page lists, usage @@ -114,7 +114,7 @@ allocation behavior using Linux NUMA memory policy. [see System administrators can restrict the CPUs and nodes' memories that a non- privileged user can specify in the scheduling or NUMA commands and functions -using control groups and CPUsets. [see Documentation/cgroup-v1/cpusets.txt] +using control groups and CPUsets. [see Documentation/cgroup-v1/cpusets.rst] On architectures that do not hide memoryless nodes, Linux will include only zones [nodes] with memory in the zonelists. This means that for a memoryless diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index f68d61335abb..35bba27d5fff 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -41,7 +41,7 @@ locations. Larger installations usually partition the system using cpusets into sections of nodes. Paul Jackson has equipped cpusets with the ability to move pages when a task is moved to another cpuset (See -Documentation/cgroup-v1/cpusets.txt). +Documentation/cgroup-v1/cpusets.rst). Cpusets allows the automation of process locality. If a task is moved to a new cpuset then also all its pages are moved with it so that the performance of the process does not sink dramatically. Also the pages diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst index b8e29f977f2d..c6d94118fbcc 100644 --- a/Documentation/vm/unevictable-lru.rst +++ b/Documentation/vm/unevictable-lru.rst @@ -98,7 +98,7 @@ Memory Control Group Interaction -------------------------------- The unevictable LRU facility interacts with the memory control group [aka -memory controller; see Documentation/cgroup-v1/memory.txt] by extending the +memory controller; see Documentation/cgroup-v1/memory.rst] by extending the lru_list enum. The memory controller data structure automatically gets a per-zone unevictable diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst index 74fbb78b3c67..a6926cd40f70 100644 --- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst @@ -15,7 +15,7 @@ assign them to cpusets and their attached tasks. This is a way of limiting the amount of system memory that are available to a certain class of tasks. For more information on the features of cpusets, see -Documentation/cgroup-v1/cpusets.txt. +Documentation/cgroup-v1/cpusets.rst. There are a number of different configurations you can use for your needs. For more information on the numa=fake command line option and its various ways of configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. @@ -40,7 +40,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:: On node 3 totalpages: 131072 Now following the instructions for mounting the cpusets filesystem from -Documentation/cgroup-v1/cpusets.txt, you can assign fake nodes (i.e. contiguous memory +Documentation/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory address spaces) to individual cpusets:: [root@xroads /]# mkdir exampleset diff --git a/MAINTAINERS b/MAINTAINERS index 429c6c624861..b8663911779a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4094,7 +4094,7 @@ W: http://www.bullopensource.org/cpuset/ W: http://oss.sgi.com/projects/cpusets/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git S: Maintained -F: Documentation/cgroup-v1/cpusets.txt +F: Documentation/cgroup-v1/cpusets.rst F: include/linux/cpuset.h F: kernel/cgroup/cpuset.c diff --git a/block/Kconfig b/block/Kconfig index 1b220101a9cb..78374cb03114 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -88,7 +88,7 @@ config BLK_DEV_THROTTLING one needs to mount and use blkio cgroup controller for creating cgroups and specifying per device IO rate policies. - See Documentation/cgroup-v1/blkio-controller.txt for more information. + See Documentation/cgroup-v1/blkio-controller.rst for more information. config BLK_DEV_THROTTLING_LOW bool "Block throttling .low limit interface support (EXPERIMENTAL)" diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1615b9c17e02..a3699d4d27e0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -619,7 +619,7 @@ struct cftype { /* * Control Group subsystem type. - * See Documentation/cgroup-v1/cgroups.txt for details + * See Documentation/cgroup-v1/cgroups.rst for details */ struct cgroup_subsys { struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 63e0cf66f01a..79e080ea71d8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -783,7 +783,7 @@ union bpf_attr { * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file - * *Documentation/cgroup-v1/net_cls.txt*. + * *Documentation/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can diff --git a/init/Kconfig b/init/Kconfig index 36894c9fb420..5d4bf0f676e9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -798,7 +798,7 @@ config BLK_CGROUP CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set CONFIG_BLK_DEV_THROTTLING=y. - See Documentation/cgroup-v1/blkio-controller.txt for more information. + See Documentation/cgroup-v1/blkio-controller.rst for more information. config DEBUG_BLK_CGROUP bool "IO controller debugging" diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 6a1942ed781c..fc6668f9db15 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -729,7 +729,7 @@ static inline int nr_cpusets(void) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.rst * for a background explanation of this. * * Does not return errors, on the theory that the callers of this diff --git a/security/device_cgroup.c b/security/device_cgroup.c index dc28914fa72e..c07196502577 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -509,7 +509,7 @@ static inline int may_allow_all(struct dev_cgroup *parent) * This is one of the three key functions for hierarchy implementation. * This function is responsible for re-evaluating all the cgroup's active * exceptions due to a parent's exception change. - * Refer to Documentation/cgroup-v1/devices.txt for more details. + * Refer to Documentation/cgroup-v1/devices.rst for more details. */ static void revalidate_active_exceptions(struct dev_cgroup *devcg) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 63e0cf66f01a..79e080ea71d8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -783,7 +783,7 @@ union bpf_attr { * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file - * *Documentation/cgroup-v1/net_cls.txt*. + * *Documentation/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can -- cgit v1.2.3 From 458f69ef36656dc74679667380422dd8063eabfb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 12 Jun 2019 14:53:00 -0300 Subject: docs: timers: convert docs to ReST and rename to *.rst The conversion here is really trivial: just a bunch of title markups and very few puntual changes is enough to make it to be parsed by Sphinx and generate a nice html. The conversion is actually: - add blank lines and identation in order to identify paragraphs; - fix tables markups; - add some lists markups; - mark literal blocks; - adjust title markups. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Acked-by: Mark Brown Signed-off-by: Jonathan Corbet --- Documentation/timers/NO_HZ.txt | 318 --------------------------------- Documentation/timers/highres.rst | 250 ++++++++++++++++++++++++++ Documentation/timers/highres.txt | 249 -------------------------- Documentation/timers/hpet.rst | 30 ++++ Documentation/timers/hpet.txt | 28 --- Documentation/timers/hrtimers.rst | 178 +++++++++++++++++++ Documentation/timers/hrtimers.txt | 178 ------------------- Documentation/timers/index.rst | 22 +++ Documentation/timers/no_hz.rst | 326 ++++++++++++++++++++++++++++++++++ Documentation/timers/timekeeping.rst | 180 +++++++++++++++++++ Documentation/timers/timekeeping.txt | 179 ------------------- Documentation/timers/timers-howto.rst | 112 ++++++++++++ Documentation/timers/timers-howto.txt | 105 ----------- MAINTAINERS | 2 +- drivers/media/usb/dvb-usb-v2/anysee.c | 2 +- drivers/regulator/core.c | 2 +- include/linux/iopoll.h | 4 +- include/linux/regmap.h | 4 +- scripts/checkpatch.pl | 8 +- sound/soc/sof/ops.h | 2 +- 20 files changed, 1110 insertions(+), 1069 deletions(-) delete mode 100644 Documentation/timers/NO_HZ.txt create mode 100644 Documentation/timers/highres.rst delete mode 100644 Documentation/timers/highres.txt create mode 100644 Documentation/timers/hpet.rst delete mode 100644 Documentation/timers/hpet.txt create mode 100644 Documentation/timers/hrtimers.rst delete mode 100644 Documentation/timers/hrtimers.txt create mode 100644 Documentation/timers/index.rst create mode 100644 Documentation/timers/no_hz.rst create mode 100644 Documentation/timers/timekeeping.rst delete mode 100644 Documentation/timers/timekeeping.txt create mode 100644 Documentation/timers/timers-howto.rst delete mode 100644 Documentation/timers/timers-howto.txt (limited to 'include/linux') diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt deleted file mode 100644 index 9591092da5e0..000000000000 --- a/Documentation/timers/NO_HZ.txt +++ /dev/null @@ -1,318 +0,0 @@ - NO_HZ: Reducing Scheduling-Clock Ticks - - -This document describes Kconfig options and boot parameters that can -reduce the number of scheduling-clock interrupts, thereby improving energy -efficiency and reducing OS jitter. Reducing OS jitter is important for -some types of computationally intensive high-performance computing (HPC) -applications and for real-time applications. - -There are three main ways of managing scheduling-clock interrupts -(also known as "scheduling-clock ticks" or simply "ticks"): - -1. Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or - CONFIG_NO_HZ=n for older kernels). You normally will -not- - want to choose this option. - -2. Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or - CONFIG_NO_HZ=y for older kernels). This is the most common - approach, and should be the default. - -3. Omit scheduling-clock ticks on CPUs that are either idle or that - have only one runnable task (CONFIG_NO_HZ_FULL=y). Unless you - are running realtime applications or certain types of HPC - workloads, you will normally -not- want this option. - -These three cases are described in the following three sections, followed -by a third section on RCU-specific considerations, a fourth section -discussing testing, and a fifth and final section listing known issues. - - -NEVER OMIT SCHEDULING-CLOCK TICKS - -Very old versions of Linux from the 1990s and the very early 2000s -are incapable of omitting scheduling-clock ticks. It turns out that -there are some situations where this old-school approach is still the -right approach, for example, in heavy workloads with lots of tasks -that use short bursts of CPU, where there are very frequent idle -periods, but where these idle periods are also quite short (tens or -hundreds of microseconds). For these types of workloads, scheduling -clock interrupts will normally be delivered any way because there -will frequently be multiple runnable tasks per CPU. In these cases, -attempting to turn off the scheduling clock interrupt will have no effect -other than increasing the overhead of switching to and from idle and -transitioning between user and kernel execution. - -This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or -CONFIG_NO_HZ=n for older kernels). - -However, if you are instead running a light workload with long idle -periods, failing to omit scheduling-clock interrupts will result in -excessive power consumption. This is especially bad on battery-powered -devices, where it results in extremely short battery lifetimes. If you -are running light workloads, you should therefore read the following -section. - -In addition, if you are running either a real-time workload or an HPC -workload with short iterations, the scheduling-clock interrupts can -degrade your applications performance. If this describes your workload, -you should read the following two sections. - - -OMIT SCHEDULING-CLOCK TICKS FOR IDLE CPUs - -If a CPU is idle, there is little point in sending it a scheduling-clock -interrupt. After all, the primary purpose of a scheduling-clock interrupt -is to force a busy CPU to shift its attention among multiple duties, -and an idle CPU has no duties to shift its attention among. - -The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending -scheduling-clock interrupts to idle CPUs, which is critically important -both to battery-powered devices and to highly virtualized mainframes. -A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would -drain its battery very quickly, easily 2-3 times as fast as would the -same device running a CONFIG_NO_HZ_IDLE=y kernel. A mainframe running -1,500 OS instances might find that half of its CPU time was consumed by -unnecessary scheduling-clock interrupts. In these situations, there -is strong motivation to avoid sending scheduling-clock interrupts to -idle CPUs. That said, dyntick-idle mode is not free: - -1. It increases the number of instructions executed on the path - to and from the idle loop. - -2. On many architectures, dyntick-idle mode also increases the - number of expensive clock-reprogramming operations. - -Therefore, systems with aggressive real-time response constraints often -run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels) -in order to avoid degrading from-idle transition latencies. - -An idle CPU that is not receiving scheduling-clock interrupts is said to -be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running -tickless". The remainder of this document will use "dyntick-idle mode". - -There is also a boot parameter "nohz=" that can be used to disable -dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off". -By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling -dyntick-idle mode. - - -OMIT SCHEDULING-CLOCK TICKS FOR CPUs WITH ONLY ONE RUNNABLE TASK - -If a CPU has only one runnable task, there is little point in sending it -a scheduling-clock interrupt because there is no other task to switch to. -Note that omitting scheduling-clock ticks for CPUs with only one runnable -task implies also omitting them for idle CPUs. - -The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid -sending scheduling-clock interrupts to CPUs with a single runnable task, -and such CPUs are said to be "adaptive-ticks CPUs". This is important -for applications with aggressive real-time response constraints because -it allows them to improve their worst-case response times by the maximum -duration of a scheduling-clock interrupt. It is also important for -computationally intensive short-iteration workloads: If any CPU is -delayed during a given iteration, all the other CPUs will be forced to -wait idle while the delayed CPU finishes. Thus, the delay is multiplied -by one less than the number of CPUs. In these situations, there is -again strong motivation to avoid sending scheduling-clock interrupts. - -By default, no CPU will be an adaptive-ticks CPU. The "nohz_full=" -boot parameter specifies the adaptive-ticks CPUs. For example, -"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks -CPUs. Note that you are prohibited from marking all of the CPUs as -adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain -online to handle timekeeping tasks in order to ensure that system -calls like gettimeofday() returns accurate values on adaptive-tick CPUs. -(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running -user processes to observe slight drifts in clock rate.) Therefore, the -boot CPU is prohibited from entering adaptive-ticks mode. Specifying a -"nohz_full=" mask that includes the boot CPU will result in a boot-time -error message, and the boot CPU will be removed from the mask. Note that -this means that your system must have at least two CPUs in order for -CONFIG_NO_HZ_FULL=y to do anything for you. - -Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. -This is covered in the "RCU IMPLICATIONS" section below. - -Normally, a CPU remains in adaptive-ticks mode as long as possible. -In particular, transitioning to kernel mode does not automatically change -the mode. Instead, the CPU will exit adaptive-ticks mode only if needed, -for example, if that CPU enqueues an RCU callback. - -Just as with dyntick-idle mode, the benefits of adaptive-tick mode do -not come for free: - -1. CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run - adaptive ticks without also running dyntick idle. This dependency - extends down into the implementation, so that all of the costs - of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL. - -2. The user/kernel transitions are slightly more expensive due - to the need to inform kernel subsystems (such as RCU) about - the change in mode. - -3. POSIX CPU timers prevent CPUs from entering adaptive-tick mode. - Real-time applications needing to take actions based on CPU time - consumption need to use other means of doing so. - -4. If there are more perf events pending than the hardware can - accommodate, they are normally round-robined so as to collect - all of them over time. Adaptive-tick mode may prevent this - round-robining from happening. This will likely be fixed by - preventing CPUs with large numbers of perf events pending from - entering adaptive-tick mode. - -5. Scheduler statistics for adaptive-tick CPUs may be computed - slightly differently than those for non-adaptive-tick CPUs. - This might in turn perturb load-balancing of real-time tasks. - -6. The LB_BIAS scheduler feature is disabled by adaptive ticks. - -Although improvements are expected over time, adaptive ticks is quite -useful for many types of real-time and compute-intensive applications. -However, the drawbacks listed above mean that adaptive ticks should not -(yet) be enabled by default. - - -RCU IMPLICATIONS - -There are situations in which idle CPUs cannot be permitted to -enter either dyntick-idle mode or adaptive-tick mode, the most -common being when that CPU has RCU callbacks pending. - -The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs -to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, -a timer will awaken these CPUs every four jiffies in order to ensure -that the RCU callbacks are processed in a timely fashion. - -Another approach is to offload RCU callback processing to "rcuo" kthreads -using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to -offload may be selected using The "rcu_nocbs=" kernel boot parameter, -which takes a comma-separated list of CPUs and CPU ranges, for example, -"1,3-5" selects CPUs 1, 3, 4, and 5. - -The offloaded CPUs will never queue RCU callbacks, and therefore RCU -never prevents offloaded CPUs from entering either dyntick-idle mode -or adaptive-tick mode. That said, note that it is up to userspace to -pin the "rcuo" kthreads to specific CPUs if desired. Otherwise, the -scheduler will decide where to run them, which might or might not be -where you want them to run. - - -TESTING - -So you enable all the OS-jitter features described in this document, -but do not see any change in your workload's behavior. Is this because -your workload isn't affected that much by OS jitter, or is it because -something else is in the way? This section helps answer this question -by providing a simple OS-jitter test suite, which is available on branch -master of the following git archive: - -git://git.kernel.org/pub/scm/linux/kernel/git/frederic/dynticks-testing.git - -Clone this archive and follow the instructions in the README file. -This test procedure will produce a trace that will allow you to evaluate -whether or not you have succeeded in removing OS jitter from your system. -If this trace shows that you have removed OS jitter as much as is -possible, then you can conclude that your workload is not all that -sensitive to OS jitter. - -Note: this test requires that your system have at least two CPUs. -We do not currently have a good way to remove OS jitter from single-CPU -systems. - - -KNOWN ISSUES - -o Dyntick-idle slows transitions to and from idle slightly. - In practice, this has not been a problem except for the most - aggressive real-time workloads, which have the option of disabling - dyntick-idle mode, an option that most of them take. However, - some workloads will no doubt want to use adaptive ticks to - eliminate scheduling-clock interrupt latencies. Here are some - options for these workloads: - - a. Use PMQOS from userspace to inform the kernel of your - latency requirements (preferred). - - b. On x86 systems, use the "idle=mwait" boot parameter. - - c. On x86 systems, use the "intel_idle.max_cstate=" to limit - ` the maximum C-state depth. - - d. On x86 systems, use the "idle=poll" boot parameter. - However, please note that use of this parameter can cause - your CPU to overheat, which may cause thermal throttling - to degrade your latencies -- and that this degradation can - be even worse than that of dyntick-idle. Furthermore, - this parameter effectively disables Turbo Mode on Intel - CPUs, which can significantly reduce maximum performance. - -o Adaptive-ticks slows user/kernel transitions slightly. - This is not expected to be a problem for computationally intensive - workloads, which have few such transitions. Careful benchmarking - will be required to determine whether or not other workloads - are significantly affected by this effect. - -o Adaptive-ticks does not do anything unless there is only one - runnable task for a given CPU, even though there are a number - of other situations where the scheduling-clock tick is not - needed. To give but one example, consider a CPU that has one - runnable high-priority SCHED_FIFO task and an arbitrary number - of low-priority SCHED_OTHER tasks. In this case, the CPU is - required to run the SCHED_FIFO task until it either blocks or - some other higher-priority task awakens on (or is assigned to) - this CPU, so there is no point in sending a scheduling-clock - interrupt to this CPU. However, the current implementation - nevertheless sends scheduling-clock interrupts to CPUs having a - single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER - tasks, even though these interrupts are unnecessary. - - And even when there are multiple runnable tasks on a given CPU, - there is little point in interrupting that CPU until the current - running task's timeslice expires, which is almost always way - longer than the time of the next scheduling-clock interrupt. - - Better handling of these sorts of situations is future work. - -o A reboot is required to reconfigure both adaptive idle and RCU - callback offloading. Runtime reconfiguration could be provided - if needed, however, due to the complexity of reconfiguring RCU at - runtime, there would need to be an earthshakingly good reason. - Especially given that you have the straightforward option of - simply offloading RCU callbacks from all CPUs and pinning them - where you want them whenever you want them pinned. - -o Additional configuration is required to deal with other sources - of OS jitter, including interrupts and system-utility tasks - and processes. This configuration normally involves binding - interrupts and tasks to particular CPUs. - -o Some sources of OS jitter can currently be eliminated only by - constraining the workload. For example, the only way to eliminate - OS jitter due to global TLB shootdowns is to avoid the unmapping - operations (such as kernel module unload operations) that - result in these shootdowns. For another example, page faults - and TLB misses can be reduced (and in some cases eliminated) by - using huge pages and by constraining the amount of memory used - by the application. Pre-faulting the working set can also be - helpful, especially when combined with the mlock() and mlockall() - system calls. - -o Unless all CPUs are idle, at least one CPU must keep the - scheduling-clock interrupt going in order to support accurate - timekeeping. - -o If there might potentially be some adaptive-ticks CPUs, there - will be at least one CPU keeping the scheduling-clock interrupt - going, even if all CPUs are otherwise idle. - - Better handling of this situation is ongoing work. - -o Some process-handling operations still require the occasional - scheduling-clock tick. These operations include calculating CPU - load, maintaining sched average, computing CFS entity vruntime, - computing avenrun, and carrying out load balancing. They are - currently accommodated by scheduling-clock tick every second - or so. On-going work will eliminate the need even for these - infrequent scheduling-clock ticks. diff --git a/Documentation/timers/highres.rst b/Documentation/timers/highres.rst new file mode 100644 index 000000000000..bde5eb7e5c9e --- /dev/null +++ b/Documentation/timers/highres.rst @@ -0,0 +1,250 @@ +===================================================== +High resolution timers and dynamic ticks design notes +===================================================== + +Further information can be found in the paper of the OLS 2006 talk "hrtimers +and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can +be found on the OLS website: +https://www.kernel.org/doc/ols/2006/ols2006v1-pages-333-346.pdf + +The slides to this talk are available from: +http://www.cs.columbia.edu/~nahum/w6998/papers/ols2006-hrtimers-slides.pdf + +The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the +changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the +design of the Linux time(r) system before hrtimers and other building blocks +got merged into mainline. + +Note: the paper and the slides are talking about "clock event source", while we +switched to the name "clock event devices" in meantime. + +The design contains the following basic building blocks: + +- hrtimer base infrastructure +- timeofday and clock source management +- clock event management +- high resolution timer functionality +- dynamic ticks + + +hrtimer base infrastructure +--------------------------- + +The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of +the base implementation are covered in Documentation/timers/hrtimers.rst. See +also figure #2 (OLS slides p. 15) + +The main differences to the timer wheel, which holds the armed timer_list type +timers are: + + - time ordered enqueueing into a rb-tree + - independent of ticks (the processing is based on nanoseconds) + + +timeofday and clock source management +------------------------------------- + +John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of +code out of the architecture-specific areas into a generic management +framework, as illustrated in figure #3 (OLS slides p. 18). The architecture +specific portion is reduced to the low level hardware details of the clock +sources, which are registered in the framework and selected on a quality based +decision. The low level code provides hardware setup and readout routines and +initializes data structures, which are used by the generic time keeping code to +convert the clock ticks to nanosecond based time values. All other time keeping +related functionality is moved into the generic code. The GTOD base patch got +merged into the 2.6.18 kernel. + +Further information about the Generic Time Of Day framework is available in the +OLS 2005 Proceedings Volume 1: + + http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf + +The paper "We Are Not Getting Any Younger: A New Approach to Time and +Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan. + +Figure #3 (OLS slides p.18) illustrates the transformation. + + +clock event management +---------------------- + +While clock sources provide read access to the monotonically increasing time +value, clock event devices are used to schedule the next event +interrupt(s). The next event is currently defined to be periodic, with its +period defined at compile time. The setup and selection of the event device +for various event driven functionalities is hardwired into the architecture +dependent code. This results in duplicated code across all architectures and +makes it extremely difficult to change the configuration of the system to use +event interrupt devices other than those already built into the +architecture. Another implication of the current design is that it is necessary +to touch all the architecture-specific implementations in order to provide new +functionality like high resolution timers or dynamic ticks. + +The clock events subsystem tries to address this problem by providing a generic +solution to manage clock event devices and their usage for the various clock +event driven kernel functionalities. The goal of the clock event subsystem is +to minimize the clock event related architecture dependent code to the pure +hardware related handling and to allow easy addition and utilization of new +clock event devices. It also minimizes the duplicated code across the +architectures as it provides generic functionality down to the interrupt +service handler, which is almost inherently hardware dependent. + +Clock event devices are registered either by the architecture dependent boot +code or at module insertion time. Each clock event device fills a data +structure with clock-specific property parameters and callback functions. The +clock event management decides, by using the specified property parameters, the +set of system functions a clock event device will be used to support. This +includes the distinction of per-CPU and per-system global event devices. + +System-level global event devices are used for the Linux periodic tick. Per-CPU +event devices are used to provide local CPU functionality such as process +accounting, profiling, and high resolution timers. + +The management layer assigns one or more of the following functions to a clock +event device: + + - system global periodic tick (jiffies update) + - cpu local update_process_times + - cpu local profiling + - cpu local next event interrupt (non periodic mode) + +The clock event device delegates the selection of those timer interrupt related +functions completely to the management layer. The clock management layer stores +a function pointer in the device description structure, which has to be called +from the hardware level handler. This removes a lot of duplicated code from the +architecture specific timer interrupt handlers and hands the control over the +clock event devices and the assignment of timer interrupt related functionality +to the core code. + +The clock event layer API is rather small. Aside from the clock event device +registration interface it provides functions to schedule the next event +interrupt, clock event device notification service and support for suspend and +resume. + +The framework adds about 700 lines of code which results in a 2KB increase of +the kernel binary size. The conversion of i386 removes about 100 lines of +code. The binary size decrease is in the range of 400 byte. We believe that the +increase of flexibility and the avoidance of duplicated code across +architectures justifies the slight increase of the binary size. + +The conversion of an architecture has no functional impact, but allows to +utilize the high resolution and dynamic tick functionalities without any change +to the clock event device and timer interrupt code. After the conversion the +enabling of high resolution timers and dynamic ticks is simply provided by +adding the kernel/time/Kconfig file to the architecture specific Kconfig and +adding the dynamic tick specific calls to the idle routine (a total of 3 lines +added to the idle function and the Kconfig file) + +Figure #4 (OLS slides p.20) illustrates the transformation. + + +high resolution timer functionality +----------------------------------- + +During system boot it is not possible to use the high resolution timer +functionality, while making it possible would be difficult and would serve no +useful function. The initialization of the clock event device framework, the +clock source framework (GTOD) and hrtimers itself has to be done and +appropriate clock sources and clock event devices have to be registered before +the high resolution functionality can work. Up to the point where hrtimers are +initialized, the system works in the usual low resolution periodic mode. The +clock source and the clock event device layers provide notification functions +which inform hrtimers about availability of new hardware. hrtimers validates +the usability of the registered clock sources and clock event devices before +switching to high resolution mode. This ensures also that a kernel which is +configured for high resolution timers can run on a system which lacks the +necessary hardware support. + +The high resolution timer code does not support SMP machines which have only +global clock event devices. The support of such hardware would involve IPI +calls when an interrupt happens. The overhead would be much larger than the +benefit. This is the reason why we currently disable high resolution and +dynamic ticks on i386 SMP systems which stop the local APIC in C3 power +state. A workaround is available as an idea, but the problem has not been +tackled yet. + +The time ordered insertion of timers provides all the infrastructure to decide +whether the event device has to be reprogrammed when a timer is added. The +decision is made per timer base and synchronized across per-cpu timer bases in +a support function. The design allows the system to utilize separate per-CPU +clock event devices for the per-CPU timer bases, but currently only one +reprogrammable clock event device per-CPU is utilized. + +When the timer interrupt happens, the next event interrupt handler is called +from the clock event distribution code and moves expired timers from the +red-black tree to a separate double linked list and invokes the softirq +handler. An additional mode field in the hrtimer structure allows the system to +execute callback functions directly from the next event interrupt handler. This +is restricted to code which can safely be executed in the hard interrupt +context. This applies, for example, to the common case of a wakeup function as +used by nanosleep. The advantage of executing the handler in the interrupt +context is the avoidance of up to two context switches - from the interrupted +context to the softirq and to the task which is woken up by the expired +timer. + +Once a system has switched to high resolution mode, the periodic tick is +switched off. This disables the per system global periodic clock event device - +e.g. the PIT on i386 SMP systems. + +The periodic tick functionality is provided by an per-cpu hrtimer. The callback +function is executed in the next event interrupt context and updates jiffies +and calls update_process_times and profiling. The implementation of the hrtimer +based periodic tick is designed to be extended with dynamic tick functionality. +This allows to use a single clock event device to schedule high resolution +timer and periodic events (jiffies tick, profiling, process accounting) on UP +systems. This has been proved to work with the PIT on i386 and the Incrementer +on PPC. + +The softirq for running the hrtimer queues and executing the callbacks has been +separated from the tick bound timer softirq to allow accurate delivery of high +resolution timer signals which are used by itimer and POSIX interval +timers. The execution of this softirq can still be delayed by other softirqs, +but the overall latencies have been significantly improved by this separation. + +Figure #5 (OLS slides p.22) illustrates the transformation. + + +dynamic ticks +------------- + +Dynamic ticks are the logical consequence of the hrtimer based periodic tick +replacement (sched_tick). The functionality of the sched_tick hrtimer is +extended by three functions: + +- hrtimer_stop_sched_tick +- hrtimer_restart_sched_tick +- hrtimer_update_jiffies + +hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code +evaluates the next scheduled timer event (from both hrtimers and the timer +wheel) and in case that the next event is further away than the next tick it +reprograms the sched_tick to this future event, to allow longer idle sleeps +without worthless interruption by the periodic tick. The function is also +called when an interrupt happens during the idle period, which does not cause a +reschedule. The call is necessary as the interrupt handler might have armed a +new timer whose expiry time is before the time which was identified as the +nearest event in the previous call to hrtimer_stop_sched_tick. + +hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before +it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick, +which is kept active until the next call to hrtimer_stop_sched_tick(). + +hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens +in the idle period to make sure that jiffies are up to date and the interrupt +handler has not to deal with an eventually stale jiffy value. + +The dynamic tick feature provides statistical values which are exported to +userspace via /proc/stat and can be made available for enhanced power +management control. + +The implementation leaves room for further development like full tickless +systems, where the time slice is controlled by the scheduler, variable +frequency profiling, and a complete removal of jiffies in the future. + + +Aside the current initial submission of i386 support, the patchset has been +extended to x86_64 and ARM already. Initial (work in progress) support is also +available for MIPS and PowerPC. + + Thomas, Ingo diff --git a/Documentation/timers/highres.txt b/Documentation/timers/highres.txt deleted file mode 100644 index 8f9741592123..000000000000 --- a/Documentation/timers/highres.txt +++ /dev/null @@ -1,249 +0,0 @@ -High resolution timers and dynamic ticks design notes ------------------------------------------------------ - -Further information can be found in the paper of the OLS 2006 talk "hrtimers -and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can -be found on the OLS website: -https://www.kernel.org/doc/ols/2006/ols2006v1-pages-333-346.pdf - -The slides to this talk are available from: -http://www.cs.columbia.edu/~nahum/w6998/papers/ols2006-hrtimers-slides.pdf - -The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the -changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the -design of the Linux time(r) system before hrtimers and other building blocks -got merged into mainline. - -Note: the paper and the slides are talking about "clock event source", while we -switched to the name "clock event devices" in meantime. - -The design contains the following basic building blocks: - -- hrtimer base infrastructure -- timeofday and clock source management -- clock event management -- high resolution timer functionality -- dynamic ticks - - -hrtimer base infrastructure ---------------------------- - -The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of -the base implementation are covered in Documentation/timers/hrtimers.txt. See -also figure #2 (OLS slides p. 15) - -The main differences to the timer wheel, which holds the armed timer_list type -timers are: - - time ordered enqueueing into a rb-tree - - independent of ticks (the processing is based on nanoseconds) - - -timeofday and clock source management -------------------------------------- - -John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of -code out of the architecture-specific areas into a generic management -framework, as illustrated in figure #3 (OLS slides p. 18). The architecture -specific portion is reduced to the low level hardware details of the clock -sources, which are registered in the framework and selected on a quality based -decision. The low level code provides hardware setup and readout routines and -initializes data structures, which are used by the generic time keeping code to -convert the clock ticks to nanosecond based time values. All other time keeping -related functionality is moved into the generic code. The GTOD base patch got -merged into the 2.6.18 kernel. - -Further information about the Generic Time Of Day framework is available in the -OLS 2005 Proceedings Volume 1: -http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf - -The paper "We Are Not Getting Any Younger: A New Approach to Time and -Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan. - -Figure #3 (OLS slides p.18) illustrates the transformation. - - -clock event management ----------------------- - -While clock sources provide read access to the monotonically increasing time -value, clock event devices are used to schedule the next event -interrupt(s). The next event is currently defined to be periodic, with its -period defined at compile time. The setup and selection of the event device -for various event driven functionalities is hardwired into the architecture -dependent code. This results in duplicated code across all architectures and -makes it extremely difficult to change the configuration of the system to use -event interrupt devices other than those already built into the -architecture. Another implication of the current design is that it is necessary -to touch all the architecture-specific implementations in order to provide new -functionality like high resolution timers or dynamic ticks. - -The clock events subsystem tries to address this problem by providing a generic -solution to manage clock event devices and their usage for the various clock -event driven kernel functionalities. The goal of the clock event subsystem is -to minimize the clock event related architecture dependent code to the pure -hardware related handling and to allow easy addition and utilization of new -clock event devices. It also minimizes the duplicated code across the -architectures as it provides generic functionality down to the interrupt -service handler, which is almost inherently hardware dependent. - -Clock event devices are registered either by the architecture dependent boot -code or at module insertion time. Each clock event device fills a data -structure with clock-specific property parameters and callback functions. The -clock event management decides, by using the specified property parameters, the -set of system functions a clock event device will be used to support. This -includes the distinction of per-CPU and per-system global event devices. - -System-level global event devices are used for the Linux periodic tick. Per-CPU -event devices are used to provide local CPU functionality such as process -accounting, profiling, and high resolution timers. - -The management layer assigns one or more of the following functions to a clock -event device: - - system global periodic tick (jiffies update) - - cpu local update_process_times - - cpu local profiling - - cpu local next event interrupt (non periodic mode) - -The clock event device delegates the selection of those timer interrupt related -functions completely to the management layer. The clock management layer stores -a function pointer in the device description structure, which has to be called -from the hardware level handler. This removes a lot of duplicated code from the -architecture specific timer interrupt handlers and hands the control over the -clock event devices and the assignment of timer interrupt related functionality -to the core code. - -The clock event layer API is rather small. Aside from the clock event device -registration interface it provides functions to schedule the next event -interrupt, clock event device notification service and support for suspend and -resume. - -The framework adds about 700 lines of code which results in a 2KB increase of -the kernel binary size. The conversion of i386 removes about 100 lines of -code. The binary size decrease is in the range of 400 byte. We believe that the -increase of flexibility and the avoidance of duplicated code across -architectures justifies the slight increase of the binary size. - -The conversion of an architecture has no functional impact, but allows to -utilize the high resolution and dynamic tick functionalities without any change -to the clock event device and timer interrupt code. After the conversion the -enabling of high resolution timers and dynamic ticks is simply provided by -adding the kernel/time/Kconfig file to the architecture specific Kconfig and -adding the dynamic tick specific calls to the idle routine (a total of 3 lines -added to the idle function and the Kconfig file) - -Figure #4 (OLS slides p.20) illustrates the transformation. - - -high resolution timer functionality ------------------------------------ - -During system boot it is not possible to use the high resolution timer -functionality, while making it possible would be difficult and would serve no -useful function. The initialization of the clock event device framework, the -clock source framework (GTOD) and hrtimers itself has to be done and -appropriate clock sources and clock event devices have to be registered before -the high resolution functionality can work. Up to the point where hrtimers are -initialized, the system works in the usual low resolution periodic mode. The -clock source and the clock event device layers provide notification functions -which inform hrtimers about availability of new hardware. hrtimers validates -the usability of the registered clock sources and clock event devices before -switching to high resolution mode. This ensures also that a kernel which is -configured for high resolution timers can run on a system which lacks the -necessary hardware support. - -The high resolution timer code does not support SMP machines which have only -global clock event devices. The support of such hardware would involve IPI -calls when an interrupt happens. The overhead would be much larger than the -benefit. This is the reason why we currently disable high resolution and -dynamic ticks on i386 SMP systems which stop the local APIC in C3 power -state. A workaround is available as an idea, but the problem has not been -tackled yet. - -The time ordered insertion of timers provides all the infrastructure to decide -whether the event device has to be reprogrammed when a timer is added. The -decision is made per timer base and synchronized across per-cpu timer bases in -a support function. The design allows the system to utilize separate per-CPU -clock event devices for the per-CPU timer bases, but currently only one -reprogrammable clock event device per-CPU is utilized. - -When the timer interrupt happens, the next event interrupt handler is called -from the clock event distribution code and moves expired timers from the -red-black tree to a separate double linked list and invokes the softirq -handler. An additional mode field in the hrtimer structure allows the system to -execute callback functions directly from the next event interrupt handler. This -is restricted to code which can safely be executed in the hard interrupt -context. This applies, for example, to the common case of a wakeup function as -used by nanosleep. The advantage of executing the handler in the interrupt -context is the avoidance of up to two context switches - from the interrupted -context to the softirq and to the task which is woken up by the expired -timer. - -Once a system has switched to high resolution mode, the periodic tick is -switched off. This disables the per system global periodic clock event device - -e.g. the PIT on i386 SMP systems. - -The periodic tick functionality is provided by an per-cpu hrtimer. The callback -function is executed in the next event interrupt context and updates jiffies -and calls update_process_times and profiling. The implementation of the hrtimer -based periodic tick is designed to be extended with dynamic tick functionality. -This allows to use a single clock event device to schedule high resolution -timer and periodic events (jiffies tick, profiling, process accounting) on UP -systems. This has been proved to work with the PIT on i386 and the Incrementer -on PPC. - -The softirq for running the hrtimer queues and executing the callbacks has been -separated from the tick bound timer softirq to allow accurate delivery of high -resolution timer signals which are used by itimer and POSIX interval -timers. The execution of this softirq can still be delayed by other softirqs, -but the overall latencies have been significantly improved by this separation. - -Figure #5 (OLS slides p.22) illustrates the transformation. - - -dynamic ticks -------------- - -Dynamic ticks are the logical consequence of the hrtimer based periodic tick -replacement (sched_tick). The functionality of the sched_tick hrtimer is -extended by three functions: - -- hrtimer_stop_sched_tick -- hrtimer_restart_sched_tick -- hrtimer_update_jiffies - -hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code -evaluates the next scheduled timer event (from both hrtimers and the timer -wheel) and in case that the next event is further away than the next tick it -reprograms the sched_tick to this future event, to allow longer idle sleeps -without worthless interruption by the periodic tick. The function is also -called when an interrupt happens during the idle period, which does not cause a -reschedule. The call is necessary as the interrupt handler might have armed a -new timer whose expiry time is before the time which was identified as the -nearest event in the previous call to hrtimer_stop_sched_tick. - -hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before -it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick, -which is kept active until the next call to hrtimer_stop_sched_tick(). - -hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens -in the idle period to make sure that jiffies are up to date and the interrupt -handler has not to deal with an eventually stale jiffy value. - -The dynamic tick feature provides statistical values which are exported to -userspace via /proc/stat and can be made available for enhanced power -management control. - -The implementation leaves room for further development like full tickless -systems, where the time slice is controlled by the scheduler, variable -frequency profiling, and a complete removal of jiffies in the future. - - -Aside the current initial submission of i386 support, the patchset has been -extended to x86_64 and ARM already. Initial (work in progress) support is also -available for MIPS and PowerPC. - - Thomas, Ingo - - - diff --git a/Documentation/timers/hpet.rst b/Documentation/timers/hpet.rst new file mode 100644 index 000000000000..c9d05d3caaca --- /dev/null +++ b/Documentation/timers/hpet.rst @@ -0,0 +1,30 @@ +=========================================== +High Precision Event Timer Driver for Linux +=========================================== + +The High Precision Event Timer (HPET) hardware follows a specification +by Intel and Microsoft, revision 1. + +Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") +and up to 32 comparators. Normally three or more comparators are provided, +each of which can generate oneshot interrupts and at least one of which has +additional hardware to support periodic interrupts. The comparators are +also called "timers", which can be misleading since usually timers are +independent of each other ... these share a counter, complicating resets. + +HPET devices can support two interrupt routing modes. In one mode, the +comparators are additional interrupt sources with no particular system +role. Many x86 BIOS writers don't route HPET interrupts at all, which +prevents use of that mode. They support the other "legacy replacement" +mode where the first two comparators block interrupts from 8254 timers +and from the RTC. + +The driver supports detection of HPET driver allocation and initialization +of the HPET before the driver module_init routine is called. This enables +platform code which uses timer 0 or 1 as the main timer to intercept HPET +initialization. An example of this initialization can be found in +arch/x86/kernel/hpet.c. + +The driver provides a userspace API which resembles the API found in the +RTC driver framework. An example user space program is provided in +file:samples/timers/hpet_example.c diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt deleted file mode 100644 index 895345ec513b..000000000000 --- a/Documentation/timers/hpet.txt +++ /dev/null @@ -1,28 +0,0 @@ - High Precision Event Timer Driver for Linux - -The High Precision Event Timer (HPET) hardware follows a specification -by Intel and Microsoft, revision 1. - -Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") -and up to 32 comparators. Normally three or more comparators are provided, -each of which can generate oneshot interrupts and at least one of which has -additional hardware to support periodic interrupts. The comparators are -also called "timers", which can be misleading since usually timers are -independent of each other ... these share a counter, complicating resets. - -HPET devices can support two interrupt routing modes. In one mode, the -comparators are additional interrupt sources with no particular system -role. Many x86 BIOS writers don't route HPET interrupts at all, which -prevents use of that mode. They support the other "legacy replacement" -mode where the first two comparators block interrupts from 8254 timers -and from the RTC. - -The driver supports detection of HPET driver allocation and initialization -of the HPET before the driver module_init routine is called. This enables -platform code which uses timer 0 or 1 as the main timer to intercept HPET -initialization. An example of this initialization can be found in -arch/x86/kernel/hpet.c. - -The driver provides a userspace API which resembles the API found in the -RTC driver framework. An example user space program is provided in -file:samples/timers/hpet_example.c diff --git a/Documentation/timers/hrtimers.rst b/Documentation/timers/hrtimers.rst new file mode 100644 index 000000000000..c1c20a693e8f --- /dev/null +++ b/Documentation/timers/hrtimers.rst @@ -0,0 +1,178 @@ +====================================================== +hrtimers - subsystem for high-resolution kernel timers +====================================================== + +This patch introduces a new subsystem for high-resolution kernel timers. + +One might ask the question: we already have a timer subsystem +(kernel/timers.c), why do we need two timer subsystems? After a lot of +back and forth trying to integrate high-resolution and high-precision +features into the existing timer framework, and after testing various +such high-resolution timer implementations in practice, we came to the +conclusion that the timer wheel code is fundamentally not suitable for +such an approach. We initially didn't believe this ('there must be a way +to solve this'), and spent a considerable effort trying to integrate +things into the timer wheel, but we failed. In hindsight, there are +several reasons why such integration is hard/impossible: + +- the forced handling of low-resolution and high-resolution timers in + the same way leads to a lot of compromises, macro magic and #ifdef + mess. The timers.c code is very "tightly coded" around jiffies and + 32-bitness assumptions, and has been honed and micro-optimized for a + relatively narrow use case (jiffies in a relatively narrow HZ range) + for many years - and thus even small extensions to it easily break + the wheel concept, leading to even worse compromises. The timer wheel + code is very good and tight code, there's zero problems with it in its + current usage - but it is simply not suitable to be extended for + high-res timers. + +- the unpredictable [O(N)] overhead of cascading leads to delays which + necessitate a more complex handling of high resolution timers, which + in turn decreases robustness. Such a design still leads to rather large + timing inaccuracies. Cascading is a fundamental property of the timer + wheel concept, it cannot be 'designed out' without inevitably + degrading other portions of the timers.c code in an unacceptable way. + +- the implementation of the current posix-timer subsystem on top of + the timer wheel has already introduced a quite complex handling of + the required readjusting of absolute CLOCK_REALTIME timers at + settimeofday or NTP time - further underlying our experience by + example: that the timer wheel data structure is too rigid for high-res + timers. + +- the timer wheel code is most optimal for use cases which can be + identified as "timeouts". Such timeouts are usually set up to cover + error conditions in various I/O paths, such as networking and block + I/O. The vast majority of those timers never expire and are rarely + recascaded because the expected correct event arrives in time so they + can be removed from the timer wheel before any further processing of + them becomes necessary. Thus the users of these timeouts can accept + the granularity and precision tradeoffs of the timer wheel, and + largely expect the timer subsystem to have near-zero overhead. + Accurate timing for them is not a core purpose - in fact most of the + timeout values used are ad-hoc. For them it is at most a necessary + evil to guarantee the processing of actual timeout completions + (because most of the timeouts are deleted before completion), which + should thus be as cheap and unintrusive as possible. + +The primary users of precision timers are user-space applications that +utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel +users like drivers and subsystems which require precise timed events +(e.g. multimedia) can benefit from the availability of a separate +high-resolution timer subsystem as well. + +While this subsystem does not offer high-resolution clock sources just +yet, the hrtimer subsystem can be easily extended with high-resolution +clock capabilities, and patches for that exist and are maturing quickly. +The increasing demand for realtime and multimedia applications along +with other potential users for precise timers gives another reason to +separate the "timeout" and "precise timer" subsystems. + +Another potential benefit is that such a separation allows even more +special-purpose optimization of the existing timer wheel for the low +resolution and low precision use cases - once the precision-sensitive +APIs are separated from the timer wheel and are migrated over to +hrtimers. E.g. we could decrease the frequency of the timeout subsystem +from 250 Hz to 100 HZ (or even smaller). + +hrtimer subsystem implementation details +---------------------------------------- + +the basic design considerations were: + +- simplicity + +- data structure not bound to jiffies or any other granularity. All the + kernel logic works at 64-bit nanoseconds resolution - no compromises. + +- simplification of existing, timing related kernel code + +another basic requirement was the immediate enqueueing and ordering of +timers at activation time. After looking at several possible solutions +such as radix trees and hashes, we chose the red black tree as the basic +data structure. Rbtrees are available as a library in the kernel and are +used in various performance-critical areas of e.g. memory management and +file systems. The rbtree is solely used for time sorted ordering, while +a separate list is used to give the expiry code fast access to the +queued timers, without having to walk the rbtree. + +(This separate list is also useful for later when we'll introduce +high-resolution clocks, where we need separate pending and expired +queues while keeping the time-order intact.) + +Time-ordered enqueueing is not purely for the purposes of +high-resolution clocks though, it also simplifies the handling of +absolute timers based on a low-resolution CLOCK_REALTIME. The existing +implementation needed to keep an extra list of all armed absolute +CLOCK_REALTIME timers along with complex locking. In case of +settimeofday and NTP, all the timers (!) had to be dequeued, the +time-changing code had to fix them up one by one, and all of them had to +be enqueued again. The time-ordered enqueueing and the storage of the +expiry time in absolute time units removes all this complex and poorly +scaling code from the posix-timer implementation - the clock can simply +be set without having to touch the rbtree. This also makes the handling +of posix-timers simpler in general. + +The locking and per-CPU behavior of hrtimers was mostly taken from the +existing timer wheel code, as it is mature and well suited. Sharing code +was not really a win, due to the different data structures. Also, the +hrtimer functions now have clearer behavior and clearer names - such as +hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly +equivalent to del_timer() and del_timer_sync()] - so there's no direct +1:1 mapping between them on the algorithmic level, and thus no real +potential for code sharing either. + +Basic data types: every time value, absolute or relative, is in a +special nanosecond-resolution type: ktime_t. The kernel-internal +representation of ktime_t values and operations is implemented via +macros and inline functions, and can be switched between a "hybrid +union" type and a plain "scalar" 64bit nanoseconds representation (at +compile time). The hybrid union type optimizes time conversions on 32bit +CPUs. This build-time-selectable ktime_t storage format was implemented +to avoid the performance impact of 64-bit multiplications and divisions +on 32bit CPUs. Such operations are frequently necessary to convert +between the storage formats provided by kernel and userspace interfaces +and the internal time format. (See include/linux/ktime.h for further +details.) + +hrtimers - rounding of timer values +----------------------------------- + +the hrtimer code will round timer events to lower-resolution clocks +because it has to. Otherwise it will do no artificial rounding at all. + +one question is, what resolution value should be returned to the user by +the clock_getres() interface. This will return whatever real resolution +a given clock has - be it low-res, high-res, or artificially-low-res. + +hrtimers - testing and verification +----------------------------------- + +We used the high-resolution clock subsystem ontop of hrtimers to verify +the hrtimer implementation details in praxis, and we also ran the posix +timer tests in order to ensure specification compliance. We also ran +tests on low-resolution clocks. + +The hrtimer patch converts the following kernel functionality to use +hrtimers: + + - nanosleep + - itimers + - posix-timers + +The conversion of nanosleep and posix-timers enabled the unification of +nanosleep and clock_nanosleep. + +The code was successfully compiled for the following platforms: + + i386, x86_64, ARM, PPC, PPC64, IA64 + +The code was run-tested on the following platforms: + + i386(UP/SMP), x86_64(UP/SMP), ARM, PPC + +hrtimers were also integrated into the -rt tree, along with a +hrtimers-based high-resolution clock implementation, so the hrtimers +code got a healthy amount of testing and use in practice. + + Thomas Gleixner, Ingo Molnar diff --git a/Documentation/timers/hrtimers.txt b/Documentation/timers/hrtimers.txt deleted file mode 100644 index 588d85724f10..000000000000 --- a/Documentation/timers/hrtimers.txt +++ /dev/null @@ -1,178 +0,0 @@ - -hrtimers - subsystem for high-resolution kernel timers ----------------------------------------------------- - -This patch introduces a new subsystem for high-resolution kernel timers. - -One might ask the question: we already have a timer subsystem -(kernel/timers.c), why do we need two timer subsystems? After a lot of -back and forth trying to integrate high-resolution and high-precision -features into the existing timer framework, and after testing various -such high-resolution timer implementations in practice, we came to the -conclusion that the timer wheel code is fundamentally not suitable for -such an approach. We initially didn't believe this ('there must be a way -to solve this'), and spent a considerable effort trying to integrate -things into the timer wheel, but we failed. In hindsight, there are -several reasons why such integration is hard/impossible: - -- the forced handling of low-resolution and high-resolution timers in - the same way leads to a lot of compromises, macro magic and #ifdef - mess. The timers.c code is very "tightly coded" around jiffies and - 32-bitness assumptions, and has been honed and micro-optimized for a - relatively narrow use case (jiffies in a relatively narrow HZ range) - for many years - and thus even small extensions to it easily break - the wheel concept, leading to even worse compromises. The timer wheel - code is very good and tight code, there's zero problems with it in its - current usage - but it is simply not suitable to be extended for - high-res timers. - -- the unpredictable [O(N)] overhead of cascading leads to delays which - necessitate a more complex handling of high resolution timers, which - in turn decreases robustness. Such a design still leads to rather large - timing inaccuracies. Cascading is a fundamental property of the timer - wheel concept, it cannot be 'designed out' without inevitably - degrading other portions of the timers.c code in an unacceptable way. - -- the implementation of the current posix-timer subsystem on top of - the timer wheel has already introduced a quite complex handling of - the required readjusting of absolute CLOCK_REALTIME timers at - settimeofday or NTP time - further underlying our experience by - example: that the timer wheel data structure is too rigid for high-res - timers. - -- the timer wheel code is most optimal for use cases which can be - identified as "timeouts". Such timeouts are usually set up to cover - error conditions in various I/O paths, such as networking and block - I/O. The vast majority of those timers never expire and are rarely - recascaded because the expected correct event arrives in time so they - can be removed from the timer wheel before any further processing of - them becomes necessary. Thus the users of these timeouts can accept - the granularity and precision tradeoffs of the timer wheel, and - largely expect the timer subsystem to have near-zero overhead. - Accurate timing for them is not a core purpose - in fact most of the - timeout values used are ad-hoc. For them it is at most a necessary - evil to guarantee the processing of actual timeout completions - (because most of the timeouts are deleted before completion), which - should thus be as cheap and unintrusive as possible. - -The primary users of precision timers are user-space applications that -utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel -users like drivers and subsystems which require precise timed events -(e.g. multimedia) can benefit from the availability of a separate -high-resolution timer subsystem as well. - -While this subsystem does not offer high-resolution clock sources just -yet, the hrtimer subsystem can be easily extended with high-resolution -clock capabilities, and patches for that exist and are maturing quickly. -The increasing demand for realtime and multimedia applications along -with other potential users for precise timers gives another reason to -separate the "timeout" and "precise timer" subsystems. - -Another potential benefit is that such a separation allows even more -special-purpose optimization of the existing timer wheel for the low -resolution and low precision use cases - once the precision-sensitive -APIs are separated from the timer wheel and are migrated over to -hrtimers. E.g. we could decrease the frequency of the timeout subsystem -from 250 Hz to 100 HZ (or even smaller). - -hrtimer subsystem implementation details ----------------------------------------- - -the basic design considerations were: - -- simplicity - -- data structure not bound to jiffies or any other granularity. All the - kernel logic works at 64-bit nanoseconds resolution - no compromises. - -- simplification of existing, timing related kernel code - -another basic requirement was the immediate enqueueing and ordering of -timers at activation time. After looking at several possible solutions -such as radix trees and hashes, we chose the red black tree as the basic -data structure. Rbtrees are available as a library in the kernel and are -used in various performance-critical areas of e.g. memory management and -file systems. The rbtree is solely used for time sorted ordering, while -a separate list is used to give the expiry code fast access to the -queued timers, without having to walk the rbtree. - -(This separate list is also useful for later when we'll introduce -high-resolution clocks, where we need separate pending and expired -queues while keeping the time-order intact.) - -Time-ordered enqueueing is not purely for the purposes of -high-resolution clocks though, it also simplifies the handling of -absolute timers based on a low-resolution CLOCK_REALTIME. The existing -implementation needed to keep an extra list of all armed absolute -CLOCK_REALTIME timers along with complex locking. In case of -settimeofday and NTP, all the timers (!) had to be dequeued, the -time-changing code had to fix them up one by one, and all of them had to -be enqueued again. The time-ordered enqueueing and the storage of the -expiry time in absolute time units removes all this complex and poorly -scaling code from the posix-timer implementation - the clock can simply -be set without having to touch the rbtree. This also makes the handling -of posix-timers simpler in general. - -The locking and per-CPU behavior of hrtimers was mostly taken from the -existing timer wheel code, as it is mature and well suited. Sharing code -was not really a win, due to the different data structures. Also, the -hrtimer functions now have clearer behavior and clearer names - such as -hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly -equivalent to del_timer() and del_timer_sync()] - so there's no direct -1:1 mapping between them on the algorithmic level, and thus no real -potential for code sharing either. - -Basic data types: every time value, absolute or relative, is in a -special nanosecond-resolution type: ktime_t. The kernel-internal -representation of ktime_t values and operations is implemented via -macros and inline functions, and can be switched between a "hybrid -union" type and a plain "scalar" 64bit nanoseconds representation (at -compile time). The hybrid union type optimizes time conversions on 32bit -CPUs. This build-time-selectable ktime_t storage format was implemented -to avoid the performance impact of 64-bit multiplications and divisions -on 32bit CPUs. Such operations are frequently necessary to convert -between the storage formats provided by kernel and userspace interfaces -and the internal time format. (See include/linux/ktime.h for further -details.) - -hrtimers - rounding of timer values ------------------------------------ - -the hrtimer code will round timer events to lower-resolution clocks -because it has to. Otherwise it will do no artificial rounding at all. - -one question is, what resolution value should be returned to the user by -the clock_getres() interface. This will return whatever real resolution -a given clock has - be it low-res, high-res, or artificially-low-res. - -hrtimers - testing and verification ----------------------------------- - -We used the high-resolution clock subsystem ontop of hrtimers to verify -the hrtimer implementation details in praxis, and we also ran the posix -timer tests in order to ensure specification compliance. We also ran -tests on low-resolution clocks. - -The hrtimer patch converts the following kernel functionality to use -hrtimers: - - - nanosleep - - itimers - - posix-timers - -The conversion of nanosleep and posix-timers enabled the unification of -nanosleep and clock_nanosleep. - -The code was successfully compiled for the following platforms: - - i386, x86_64, ARM, PPC, PPC64, IA64 - -The code was run-tested on the following platforms: - - i386(UP/SMP), x86_64(UP/SMP), ARM, PPC - -hrtimers were also integrated into the -rt tree, along with a -hrtimers-based high-resolution clock implementation, so the hrtimers -code got a healthy amount of testing and use in practice. - - Thomas Gleixner, Ingo Molnar diff --git a/Documentation/timers/index.rst b/Documentation/timers/index.rst new file mode 100644 index 000000000000..91f6f8263c48 --- /dev/null +++ b/Documentation/timers/index.rst @@ -0,0 +1,22 @@ +:orphan: + +====== +timers +====== + +.. toctree:: + :maxdepth: 1 + + highres + hpet + hrtimers + no_hz + timekeeping + timers-howto + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst new file mode 100644 index 000000000000..065db217cb04 --- /dev/null +++ b/Documentation/timers/no_hz.rst @@ -0,0 +1,326 @@ +====================================== +NO_HZ: Reducing Scheduling-Clock Ticks +====================================== + + +This document describes Kconfig options and boot parameters that can +reduce the number of scheduling-clock interrupts, thereby improving energy +efficiency and reducing OS jitter. Reducing OS jitter is important for +some types of computationally intensive high-performance computing (HPC) +applications and for real-time applications. + +There are three main ways of managing scheduling-clock interrupts +(also known as "scheduling-clock ticks" or simply "ticks"): + +1. Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or + CONFIG_NO_HZ=n for older kernels). You normally will -not- + want to choose this option. + +2. Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or + CONFIG_NO_HZ=y for older kernels). This is the most common + approach, and should be the default. + +3. Omit scheduling-clock ticks on CPUs that are either idle or that + have only one runnable task (CONFIG_NO_HZ_FULL=y). Unless you + are running realtime applications or certain types of HPC + workloads, you will normally -not- want this option. + +These three cases are described in the following three sections, followed +by a third section on RCU-specific considerations, a fourth section +discussing testing, and a fifth and final section listing known issues. + + +Never Omit Scheduling-Clock Ticks +================================= + +Very old versions of Linux from the 1990s and the very early 2000s +are incapable of omitting scheduling-clock ticks. It turns out that +there are some situations where this old-school approach is still the +right approach, for example, in heavy workloads with lots of tasks +that use short bursts of CPU, where there are very frequent idle +periods, but where these idle periods are also quite short (tens or +hundreds of microseconds). For these types of workloads, scheduling +clock interrupts will normally be delivered any way because there +will frequently be multiple runnable tasks per CPU. In these cases, +attempting to turn off the scheduling clock interrupt will have no effect +other than increasing the overhead of switching to and from idle and +transitioning between user and kernel execution. + +This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or +CONFIG_NO_HZ=n for older kernels). + +However, if you are instead running a light workload with long idle +periods, failing to omit scheduling-clock interrupts will result in +excessive power consumption. This is especially bad on battery-powered +devices, where it results in extremely short battery lifetimes. If you +are running light workloads, you should therefore read the following +section. + +In addition, if you are running either a real-time workload or an HPC +workload with short iterations, the scheduling-clock interrupts can +degrade your applications performance. If this describes your workload, +you should read the following two sections. + + +Omit Scheduling-Clock Ticks For Idle CPUs +========================================= + +If a CPU is idle, there is little point in sending it a scheduling-clock +interrupt. After all, the primary purpose of a scheduling-clock interrupt +is to force a busy CPU to shift its attention among multiple duties, +and an idle CPU has no duties to shift its attention among. + +The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending +scheduling-clock interrupts to idle CPUs, which is critically important +both to battery-powered devices and to highly virtualized mainframes. +A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would +drain its battery very quickly, easily 2-3 times as fast as would the +same device running a CONFIG_NO_HZ_IDLE=y kernel. A mainframe running +1,500 OS instances might find that half of its CPU time was consumed by +unnecessary scheduling-clock interrupts. In these situations, there +is strong motivation to avoid sending scheduling-clock interrupts to +idle CPUs. That said, dyntick-idle mode is not free: + +1. It increases the number of instructions executed on the path + to and from the idle loop. + +2. On many architectures, dyntick-idle mode also increases the + number of expensive clock-reprogramming operations. + +Therefore, systems with aggressive real-time response constraints often +run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels) +in order to avoid degrading from-idle transition latencies. + +An idle CPU that is not receiving scheduling-clock interrupts is said to +be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running +tickless". The remainder of this document will use "dyntick-idle mode". + +There is also a boot parameter "nohz=" that can be used to disable +dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off". +By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling +dyntick-idle mode. + + +Omit Scheduling-Clock Ticks For CPUs With Only One Runnable Task +================================================================ + +If a CPU has only one runnable task, there is little point in sending it +a scheduling-clock interrupt because there is no other task to switch to. +Note that omitting scheduling-clock ticks for CPUs with only one runnable +task implies also omitting them for idle CPUs. + +The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid +sending scheduling-clock interrupts to CPUs with a single runnable task, +and such CPUs are said to be "adaptive-ticks CPUs". This is important +for applications with aggressive real-time response constraints because +it allows them to improve their worst-case response times by the maximum +duration of a scheduling-clock interrupt. It is also important for +computationally intensive short-iteration workloads: If any CPU is +delayed during a given iteration, all the other CPUs will be forced to +wait idle while the delayed CPU finishes. Thus, the delay is multiplied +by one less than the number of CPUs. In these situations, there is +again strong motivation to avoid sending scheduling-clock interrupts. + +By default, no CPU will be an adaptive-ticks CPU. The "nohz_full=" +boot parameter specifies the adaptive-ticks CPUs. For example, +"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks +CPUs. Note that you are prohibited from marking all of the CPUs as +adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain +online to handle timekeeping tasks in order to ensure that system +calls like gettimeofday() returns accurate values on adaptive-tick CPUs. +(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running +user processes to observe slight drifts in clock rate.) Therefore, the +boot CPU is prohibited from entering adaptive-ticks mode. Specifying a +"nohz_full=" mask that includes the boot CPU will result in a boot-time +error message, and the boot CPU will be removed from the mask. Note that +this means that your system must have at least two CPUs in order for +CONFIG_NO_HZ_FULL=y to do anything for you. + +Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. +This is covered in the "RCU IMPLICATIONS" section below. + +Normally, a CPU remains in adaptive-ticks mode as long as possible. +In particular, transitioning to kernel mode does not automatically change +the mode. Instead, the CPU will exit adaptive-ticks mode only if needed, +for example, if that CPU enqueues an RCU callback. + +Just as with dyntick-idle mode, the benefits of adaptive-tick mode do +not come for free: + +1. CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run + adaptive ticks without also running dyntick idle. This dependency + extends down into the implementation, so that all of the costs + of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL. + +2. The user/kernel transitions are slightly more expensive due + to the need to inform kernel subsystems (such as RCU) about + the change in mode. + +3. POSIX CPU timers prevent CPUs from entering adaptive-tick mode. + Real-time applications needing to take actions based on CPU time + consumption need to use other means of doing so. + +4. If there are more perf events pending than the hardware can + accommodate, they are normally round-robined so as to collect + all of them over time. Adaptive-tick mode may prevent this + round-robining from happening. This will likely be fixed by + preventing CPUs with large numbers of perf events pending from + entering adaptive-tick mode. + +5. Scheduler statistics for adaptive-tick CPUs may be computed + slightly differently than those for non-adaptive-tick CPUs. + This might in turn perturb load-balancing of real-time tasks. + +6. The LB_BIAS scheduler feature is disabled by adaptive ticks. + +Although improvements are expected over time, adaptive ticks is quite +useful for many types of real-time and compute-intensive applications. +However, the drawbacks listed above mean that adaptive ticks should not +(yet) be enabled by default. + + +RCU Implications +================ + +There are situations in which idle CPUs cannot be permitted to +enter either dyntick-idle mode or adaptive-tick mode, the most +common being when that CPU has RCU callbacks pending. + +The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs +to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, +a timer will awaken these CPUs every four jiffies in order to ensure +that the RCU callbacks are processed in a timely fashion. + +Another approach is to offload RCU callback processing to "rcuo" kthreads +using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to +offload may be selected using The "rcu_nocbs=" kernel boot parameter, +which takes a comma-separated list of CPUs and CPU ranges, for example, +"1,3-5" selects CPUs 1, 3, 4, and 5. + +The offloaded CPUs will never queue RCU callbacks, and therefore RCU +never prevents offloaded CPUs from entering either dyntick-idle mode +or adaptive-tick mode. That said, note that it is up to userspace to +pin the "rcuo" kthreads to specific CPUs if desired. Otherwise, the +scheduler will decide where to run them, which might or might not be +where you want them to run. + + +Testing +======= + +So you enable all the OS-jitter features described in this document, +but do not see any change in your workload's behavior. Is this because +your workload isn't affected that much by OS jitter, or is it because +something else is in the way? This section helps answer this question +by providing a simple OS-jitter test suite, which is available on branch +master of the following git archive: + +git://git.kernel.org/pub/scm/linux/kernel/git/frederic/dynticks-testing.git + +Clone this archive and follow the instructions in the README file. +This test procedure will produce a trace that will allow you to evaluate +whether or not you have succeeded in removing OS jitter from your system. +If this trace shows that you have removed OS jitter as much as is +possible, then you can conclude that your workload is not all that +sensitive to OS jitter. + +Note: this test requires that your system have at least two CPUs. +We do not currently have a good way to remove OS jitter from single-CPU +systems. + + +Known Issues +============ + +* Dyntick-idle slows transitions to and from idle slightly. + In practice, this has not been a problem except for the most + aggressive real-time workloads, which have the option of disabling + dyntick-idle mode, an option that most of them take. However, + some workloads will no doubt want to use adaptive ticks to + eliminate scheduling-clock interrupt latencies. Here are some + options for these workloads: + + a. Use PMQOS from userspace to inform the kernel of your + latency requirements (preferred). + + b. On x86 systems, use the "idle=mwait" boot parameter. + + c. On x86 systems, use the "intel_idle.max_cstate=" to limit + ` the maximum C-state depth. + + d. On x86 systems, use the "idle=poll" boot parameter. + However, please note that use of this parameter can cause + your CPU to overheat, which may cause thermal throttling + to degrade your latencies -- and that this degradation can + be even worse than that of dyntick-idle. Furthermore, + this parameter effectively disables Turbo Mode on Intel + CPUs, which can significantly reduce maximum performance. + +* Adaptive-ticks slows user/kernel transitions slightly. + This is not expected to be a problem for computationally intensive + workloads, which have few such transitions. Careful benchmarking + will be required to determine whether or not other workloads + are significantly affected by this effect. + +* Adaptive-ticks does not do anything unless there is only one + runnable task for a given CPU, even though there are a number + of other situations where the scheduling-clock tick is not + needed. To give but one example, consider a CPU that has one + runnable high-priority SCHED_FIFO task and an arbitrary number + of low-priority SCHED_OTHER tasks. In this case, the CPU is + required to run the SCHED_FIFO task until it either blocks or + some other higher-priority task awakens on (or is assigned to) + this CPU, so there is no point in sending a scheduling-clock + interrupt to this CPU. However, the current implementation + nevertheless sends scheduling-clock interrupts to CPUs having a + single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER + tasks, even though these interrupts are unnecessary. + + And even when there are multiple runnable tasks on a given CPU, + there is little point in interrupting that CPU until the current + running task's timeslice expires, which is almost always way + longer than the time of the next scheduling-clock interrupt. + + Better handling of these sorts of situations is future work. + +* A reboot is required to reconfigure both adaptive idle and RCU + callback offloading. Runtime reconfiguration could be provided + if needed, however, due to the complexity of reconfiguring RCU at + runtime, there would need to be an earthshakingly good reason. + Especially given that you have the straightforward option of + simply offloading RCU callbacks from all CPUs and pinning them + where you want them whenever you want them pinned. + +* Additional configuration is required to deal with other sources + of OS jitter, including interrupts and system-utility tasks + and processes. This configuration normally involves binding + interrupts and tasks to particular CPUs. + +* Some sources of OS jitter can currently be eliminated only by + constraining the workload. For example, the only way to eliminate + OS jitter due to global TLB shootdowns is to avoid the unmapping + operations (such as kernel module unload operations) that + result in these shootdowns. For another example, page faults + and TLB misses can be reduced (and in some cases eliminated) by + using huge pages and by constraining the amount of memory used + by the application. Pre-faulting the working set can also be + helpful, especially when combined with the mlock() and mlockall() + system calls. + +* Unless all CPUs are idle, at least one CPU must keep the + scheduling-clock interrupt going in order to support accurate + timekeeping. + +* If there might potentially be some adaptive-ticks CPUs, there + will be at least one CPU keeping the scheduling-clock interrupt + going, even if all CPUs are otherwise idle. + + Better handling of this situation is ongoing work. + +* Some process-handling operations still require the occasional + scheduling-clock tick. These operations include calculating CPU + load, maintaining sched average, computing CFS entity vruntime, + computing avenrun, and carrying out load balancing. They are + currently accommodated by scheduling-clock tick every second + or so. On-going work will eliminate the need even for these + infrequent scheduling-clock ticks. diff --git a/Documentation/timers/timekeeping.rst b/Documentation/timers/timekeeping.rst new file mode 100644 index 000000000000..f83e98852e2c --- /dev/null +++ b/Documentation/timers/timekeeping.rst @@ -0,0 +1,180 @@ +=========================================================== +Clock sources, Clock events, sched_clock() and delay timers +=========================================================== + +This document tries to briefly explain some basic kernel timekeeping +abstractions. It partly pertains to the drivers usually found in +drivers/clocksource in the kernel tree, but the code may be spread out +across the kernel. + +If you grep through the kernel source you will find a number of architecture- +specific implementations of clock sources, clockevents and several likewise +architecture-specific overrides of the sched_clock() function and some +delay timers. + +To provide timekeeping for your platform, the clock source provides +the basic timeline, whereas clock events shoot interrupts on certain points +on this timeline, providing facilities such as high-resolution timers. +sched_clock() is used for scheduling and timestamping, and delay timers +provide an accurate delay source using hardware counters. + + +Clock sources +------------- + +The purpose of the clock source is to provide a timeline for the system that +tells you where you are in time. For example issuing the command 'date' on +a Linux system will eventually read the clock source to determine exactly +what time it is. + +Typically the clock source is a monotonic, atomic counter which will provide +n bits which count from 0 to (2^n)-1 and then wraps around to 0 and start over. +It will ideally NEVER stop ticking as long as the system is running. It +may stop during system suspend. + +The clock source shall have as high resolution as possible, and the frequency +shall be as stable and correct as possible as compared to a real-world wall +clock. It should not move unpredictably back and forth in time or miss a few +cycles here and there. + +It must be immune to the kind of effects that occur in hardware where e.g. +the counter register is read in two phases on the bus lowest 16 bits first +and the higher 16 bits in a second bus cycle with the counter bits +potentially being updated in between leading to the risk of very strange +values from the counter. + +When the wall-clock accuracy of the clock source isn't satisfactory, there +are various quirks and layers in the timekeeping code for e.g. synchronizing +the user-visible time to RTC clocks in the system or against networked time +servers using NTP, but all they do basically is update an offset against +the clock source, which provides the fundamental timeline for the system. +These measures does not affect the clock source per se, they only adapt the +system to the shortcomings of it. + +The clock source struct shall provide means to translate the provided counter +into a nanosecond value as an unsigned long long (unsigned 64 bit) number. +Since this operation may be invoked very often, doing this in a strict +mathematical sense is not desirable: instead the number is taken as close as +possible to a nanosecond value using only the arithmetic operations +multiply and shift, so in clocksource_cyc2ns() you find: + + ns ~= (clocksource * mult) >> shift + +You will find a number of helper functions in the clock source code intended +to aid in providing these mult and shift values, such as +clocksource_khz2mult(), clocksource_hz2mult() that help determine the +mult factor from a fixed shift, and clocksource_register_hz() and +clocksource_register_khz() which will help out assigning both shift and mult +factors using the frequency of the clock source as the only input. + +For real simple clock sources accessed from a single I/O memory location +there is nowadays even clocksource_mmio_init() which will take a memory +location, bit width, a parameter telling whether the counter in the +register counts up or down, and the timer clock rate, and then conjure all +necessary parameters. + +Since a 32-bit counter at say 100 MHz will wrap around to zero after some 43 +seconds, the code handling the clock source will have to compensate for this. +That is the reason why the clock source struct also contains a 'mask' +member telling how many bits of the source are valid. This way the timekeeping +code knows when the counter will wrap around and can insert the necessary +compensation code on both sides of the wrap point so that the system timeline +remains monotonic. + + +Clock events +------------ + +Clock events are the conceptual reverse of clock sources: they take a +desired time specification value and calculate the values to poke into +hardware timer registers. + +Clock events are orthogonal to clock sources. The same hardware +and register range may be used for the clock event, but it is essentially +a different thing. The hardware driving clock events has to be able to +fire interrupts, so as to trigger events on the system timeline. On an SMP +system, it is ideal (and customary) to have one such event driving timer per +CPU core, so that each core can trigger events independently of any other +core. + +You will notice that the clock event device code is based on the same basic +idea about translating counters to nanoseconds using mult and shift +arithmetic, and you find the same family of helper functions again for +assigning these values. The clock event driver does not need a 'mask' +attribute however: the system will not try to plan events beyond the time +horizon of the clock event. + + +sched_clock() +------------- + +In addition to the clock sources and clock events there is a special weak +function in the kernel called sched_clock(). This function shall return the +number of nanoseconds since the system was started. An architecture may or +may not provide an implementation of sched_clock() on its own. If a local +implementation is not provided, the system jiffy counter will be used as +sched_clock(). + +As the name suggests, sched_clock() is used for scheduling the system, +determining the absolute timeslice for a certain process in the CFS scheduler +for example. It is also used for printk timestamps when you have selected to +include time information in printk for things like bootcharts. + +Compared to clock sources, sched_clock() has to be very fast: it is called +much more often, especially by the scheduler. If you have to do trade-offs +between accuracy compared to the clock source, you may sacrifice accuracy +for speed in sched_clock(). It however requires some of the same basic +characteristics as the clock source, i.e. it should be monotonic. + +The sched_clock() function may wrap only on unsigned long long boundaries, +i.e. after 64 bits. Since this is a nanosecond value this will mean it wraps +after circa 585 years. (For most practical systems this means "never".) + +If an architecture does not provide its own implementation of this function, +it will fall back to using jiffies, making its maximum resolution 1/HZ of the +jiffy frequency for the architecture. This will affect scheduling accuracy +and will likely show up in system benchmarks. + +The clock driving sched_clock() may stop or reset to zero during system +suspend/sleep. This does not matter to the function it serves of scheduling +events on the system. However it may result in interesting timestamps in +printk(). + +The sched_clock() function should be callable in any context, IRQ- and +NMI-safe and return a sane value in any context. + +Some architectures may have a limited set of time sources and lack a nice +counter to derive a 64-bit nanosecond value, so for example on the ARM +architecture, special helper functions have been created to provide a +sched_clock() nanosecond base from a 16- or 32-bit counter. Sometimes the +same counter that is also used as clock source is used for this purpose. + +On SMP systems, it is crucial for performance that sched_clock() can be called +independently on each CPU without any synchronization performance hits. +Some hardware (such as the x86 TSC) will cause the sched_clock() function to +drift between the CPUs on the system. The kernel can work around this by +enabling the CONFIG_HAVE_UNSTABLE_SCHED_CLOCK option. This is another aspect +that makes sched_clock() different from the ordinary clock source. + + +Delay timers (some architectures only) +-------------------------------------- + +On systems with variable CPU frequency, the various kernel delay() functions +will sometimes behave strangely. Basically these delays usually use a hard +loop to delay a certain number of jiffy fractions using a "lpj" (loops per +jiffy) value, calibrated on boot. + +Let's hope that your system is running on maximum frequency when this value +is calibrated: as an effect when the frequency is geared down to half the +full frequency, any delay() will be twice as long. Usually this does not +hurt, as you're commonly requesting that amount of delay *or more*. But +basically the semantics are quite unpredictable on such systems. + +Enter timer-based delays. Using these, a timer read may be used instead of +a hard-coded loop for providing the desired delay. + +This is done by declaring a struct delay_timer and assigning the appropriate +function pointers and rate settings for this delay timer. + +This is available on some architectures like OpenRISC or ARM. diff --git a/Documentation/timers/timekeeping.txt b/Documentation/timers/timekeeping.txt deleted file mode 100644 index 2d1732b0a868..000000000000 --- a/Documentation/timers/timekeeping.txt +++ /dev/null @@ -1,179 +0,0 @@ -Clock sources, Clock events, sched_clock() and delay timers ------------------------------------------------------------ - -This document tries to briefly explain some basic kernel timekeeping -abstractions. It partly pertains to the drivers usually found in -drivers/clocksource in the kernel tree, but the code may be spread out -across the kernel. - -If you grep through the kernel source you will find a number of architecture- -specific implementations of clock sources, clockevents and several likewise -architecture-specific overrides of the sched_clock() function and some -delay timers. - -To provide timekeeping for your platform, the clock source provides -the basic timeline, whereas clock events shoot interrupts on certain points -on this timeline, providing facilities such as high-resolution timers. -sched_clock() is used for scheduling and timestamping, and delay timers -provide an accurate delay source using hardware counters. - - -Clock sources -------------- - -The purpose of the clock source is to provide a timeline for the system that -tells you where you are in time. For example issuing the command 'date' on -a Linux system will eventually read the clock source to determine exactly -what time it is. - -Typically the clock source is a monotonic, atomic counter which will provide -n bits which count from 0 to (2^n)-1 and then wraps around to 0 and start over. -It will ideally NEVER stop ticking as long as the system is running. It -may stop during system suspend. - -The clock source shall have as high resolution as possible, and the frequency -shall be as stable and correct as possible as compared to a real-world wall -clock. It should not move unpredictably back and forth in time or miss a few -cycles here and there. - -It must be immune to the kind of effects that occur in hardware where e.g. -the counter register is read in two phases on the bus lowest 16 bits first -and the higher 16 bits in a second bus cycle with the counter bits -potentially being updated in between leading to the risk of very strange -values from the counter. - -When the wall-clock accuracy of the clock source isn't satisfactory, there -are various quirks and layers in the timekeeping code for e.g. synchronizing -the user-visible time to RTC clocks in the system or against networked time -servers using NTP, but all they do basically is update an offset against -the clock source, which provides the fundamental timeline for the system. -These measures does not affect the clock source per se, they only adapt the -system to the shortcomings of it. - -The clock source struct shall provide means to translate the provided counter -into a nanosecond value as an unsigned long long (unsigned 64 bit) number. -Since this operation may be invoked very often, doing this in a strict -mathematical sense is not desirable: instead the number is taken as close as -possible to a nanosecond value using only the arithmetic operations -multiply and shift, so in clocksource_cyc2ns() you find: - - ns ~= (clocksource * mult) >> shift - -You will find a number of helper functions in the clock source code intended -to aid in providing these mult and shift values, such as -clocksource_khz2mult(), clocksource_hz2mult() that help determine the -mult factor from a fixed shift, and clocksource_register_hz() and -clocksource_register_khz() which will help out assigning both shift and mult -factors using the frequency of the clock source as the only input. - -For real simple clock sources accessed from a single I/O memory location -there is nowadays even clocksource_mmio_init() which will take a memory -location, bit width, a parameter telling whether the counter in the -register counts up or down, and the timer clock rate, and then conjure all -necessary parameters. - -Since a 32-bit counter at say 100 MHz will wrap around to zero after some 43 -seconds, the code handling the clock source will have to compensate for this. -That is the reason why the clock source struct also contains a 'mask' -member telling how many bits of the source are valid. This way the timekeeping -code knows when the counter will wrap around and can insert the necessary -compensation code on both sides of the wrap point so that the system timeline -remains monotonic. - - -Clock events ------------- - -Clock events are the conceptual reverse of clock sources: they take a -desired time specification value and calculate the values to poke into -hardware timer registers. - -Clock events are orthogonal to clock sources. The same hardware -and register range may be used for the clock event, but it is essentially -a different thing. The hardware driving clock events has to be able to -fire interrupts, so as to trigger events on the system timeline. On an SMP -system, it is ideal (and customary) to have one such event driving timer per -CPU core, so that each core can trigger events independently of any other -core. - -You will notice that the clock event device code is based on the same basic -idea about translating counters to nanoseconds using mult and shift -arithmetic, and you find the same family of helper functions again for -assigning these values. The clock event driver does not need a 'mask' -attribute however: the system will not try to plan events beyond the time -horizon of the clock event. - - -sched_clock() -------------- - -In addition to the clock sources and clock events there is a special weak -function in the kernel called sched_clock(). This function shall return the -number of nanoseconds since the system was started. An architecture may or -may not provide an implementation of sched_clock() on its own. If a local -implementation is not provided, the system jiffy counter will be used as -sched_clock(). - -As the name suggests, sched_clock() is used for scheduling the system, -determining the absolute timeslice for a certain process in the CFS scheduler -for example. It is also used for printk timestamps when you have selected to -include time information in printk for things like bootcharts. - -Compared to clock sources, sched_clock() has to be very fast: it is called -much more often, especially by the scheduler. If you have to do trade-offs -between accuracy compared to the clock source, you may sacrifice accuracy -for speed in sched_clock(). It however requires some of the same basic -characteristics as the clock source, i.e. it should be monotonic. - -The sched_clock() function may wrap only on unsigned long long boundaries, -i.e. after 64 bits. Since this is a nanosecond value this will mean it wraps -after circa 585 years. (For most practical systems this means "never".) - -If an architecture does not provide its own implementation of this function, -it will fall back to using jiffies, making its maximum resolution 1/HZ of the -jiffy frequency for the architecture. This will affect scheduling accuracy -and will likely show up in system benchmarks. - -The clock driving sched_clock() may stop or reset to zero during system -suspend/sleep. This does not matter to the function it serves of scheduling -events on the system. However it may result in interesting timestamps in -printk(). - -The sched_clock() function should be callable in any context, IRQ- and -NMI-safe and return a sane value in any context. - -Some architectures may have a limited set of time sources and lack a nice -counter to derive a 64-bit nanosecond value, so for example on the ARM -architecture, special helper functions have been created to provide a -sched_clock() nanosecond base from a 16- or 32-bit counter. Sometimes the -same counter that is also used as clock source is used for this purpose. - -On SMP systems, it is crucial for performance that sched_clock() can be called -independently on each CPU without any synchronization performance hits. -Some hardware (such as the x86 TSC) will cause the sched_clock() function to -drift between the CPUs on the system. The kernel can work around this by -enabling the CONFIG_HAVE_UNSTABLE_SCHED_CLOCK option. This is another aspect -that makes sched_clock() different from the ordinary clock source. - - -Delay timers (some architectures only) --------------------------------------- - -On systems with variable CPU frequency, the various kernel delay() functions -will sometimes behave strangely. Basically these delays usually use a hard -loop to delay a certain number of jiffy fractions using a "lpj" (loops per -jiffy) value, calibrated on boot. - -Let's hope that your system is running on maximum frequency when this value -is calibrated: as an effect when the frequency is geared down to half the -full frequency, any delay() will be twice as long. Usually this does not -hurt, as you're commonly requesting that amount of delay *or more*. But -basically the semantics are quite unpredictable on such systems. - -Enter timer-based delays. Using these, a timer read may be used instead of -a hard-coded loop for providing the desired delay. - -This is done by declaring a struct delay_timer and assigning the appropriate -function pointers and rate settings for this delay timer. - -This is available on some architectures like OpenRISC or ARM. diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst new file mode 100644 index 000000000000..7e3167bec2b1 --- /dev/null +++ b/Documentation/timers/timers-howto.rst @@ -0,0 +1,112 @@ +=================================================================== +delays - Information on the various kernel delay / sleep mechanisms +=================================================================== + +This document seeks to answer the common question: "What is the +RightWay (TM) to insert a delay?" + +This question is most often faced by driver writers who have to +deal with hardware delays and who may not be the most intimately +familiar with the inner workings of the Linux Kernel. + + +Inserting Delays +---------------- + +The first, and most important, question you need to ask is "Is my +code in an atomic context?" This should be followed closely by "Does +it really need to delay in atomic context?" If so... + +ATOMIC CONTEXT: + You must use the `*delay` family of functions. These + functions use the jiffie estimation of clock speed + and will busy wait for enough loop cycles to achieve + the desired delay: + + ndelay(unsigned long nsecs) + udelay(unsigned long usecs) + mdelay(unsigned long msecs) + + udelay is the generally preferred API; ndelay-level + precision may not actually exist on many non-PC devices. + + mdelay is macro wrapper around udelay, to account for + possible overflow when passing large arguments to udelay. + In general, use of mdelay is discouraged and code should + be refactored to allow for the use of msleep. + +NON-ATOMIC CONTEXT: + You should use the `*sleep[_range]` family of functions. + There are a few more options here, while any of them may + work correctly, using the "right" sleep function will + help the scheduler, power management, and just make your + driver better :) + + -- Backed by busy-wait loop: + + udelay(unsigned long usecs) + + -- Backed by hrtimers: + + usleep_range(unsigned long min, unsigned long max) + + -- Backed by jiffies / legacy_timers + + msleep(unsigned long msecs) + msleep_interruptible(unsigned long msecs) + + Unlike the `*delay` family, the underlying mechanism + driving each of these calls varies, thus there are + quirks you should be aware of. + + + SLEEPING FOR "A FEW" USECS ( < ~10us? ): + * Use udelay + + - Why not usleep? + On slower systems, (embedded, OR perhaps a speed- + stepped PC!) the overhead of setting up the hrtimers + for usleep *may* not be worth it. Such an evaluation + will obviously depend on your specific situation, but + it is something to be aware of. + + SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms): + * Use usleep_range + + - Why not msleep for (1ms - 20ms)? + Explained originally here: + http://lkml.org/lkml/2007/8/3/250 + + msleep(1~20) may not do what the caller intends, and + will often sleep longer (~20 ms actual sleep for any + value given in the 1~20ms range). In many cases this + is not the desired behavior. + + - Why is there no "usleep" / What is a good range? + Since usleep_range is built on top of hrtimers, the + wakeup will be very precise (ish), thus a simple + usleep function would likely introduce a large number + of undesired interrupts. + + With the introduction of a range, the scheduler is + free to coalesce your wakeup with any other wakeup + that may have happened for other reasons, or at the + worst case, fire an interrupt for your upper bound. + + The larger a range you supply, the greater a chance + that you will not trigger an interrupt; this should + be balanced with what is an acceptable upper bound on + delay / performance for your specific code path. Exact + tolerances here are very situation specific, thus it + is left to the caller to determine a reasonable range. + + SLEEPING FOR LARGER MSECS ( 10ms+ ) + * Use msleep or possibly msleep_interruptible + + - What's the difference? + msleep sets the current task to TASK_UNINTERRUPTIBLE + whereas msleep_interruptible sets the current task to + TASK_INTERRUPTIBLE before scheduling the sleep. In + short, the difference is whether the sleep can be ended + early by a signal. In general, just use msleep unless + you know you have a need for the interruptible variant. diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt deleted file mode 100644 index 038f8c77a076..000000000000 --- a/Documentation/timers/timers-howto.txt +++ /dev/null @@ -1,105 +0,0 @@ -delays - Information on the various kernel delay / sleep mechanisms -------------------------------------------------------------------- - -This document seeks to answer the common question: "What is the -RightWay (TM) to insert a delay?" - -This question is most often faced by driver writers who have to -deal with hardware delays and who may not be the most intimately -familiar with the inner workings of the Linux Kernel. - - -Inserting Delays ----------------- - -The first, and most important, question you need to ask is "Is my -code in an atomic context?" This should be followed closely by "Does -it really need to delay in atomic context?" If so... - -ATOMIC CONTEXT: - You must use the *delay family of functions. These - functions use the jiffie estimation of clock speed - and will busy wait for enough loop cycles to achieve - the desired delay: - - ndelay(unsigned long nsecs) - udelay(unsigned long usecs) - mdelay(unsigned long msecs) - - udelay is the generally preferred API; ndelay-level - precision may not actually exist on many non-PC devices. - - mdelay is macro wrapper around udelay, to account for - possible overflow when passing large arguments to udelay. - In general, use of mdelay is discouraged and code should - be refactored to allow for the use of msleep. - -NON-ATOMIC CONTEXT: - You should use the *sleep[_range] family of functions. - There are a few more options here, while any of them may - work correctly, using the "right" sleep function will - help the scheduler, power management, and just make your - driver better :) - - -- Backed by busy-wait loop: - udelay(unsigned long usecs) - -- Backed by hrtimers: - usleep_range(unsigned long min, unsigned long max) - -- Backed by jiffies / legacy_timers - msleep(unsigned long msecs) - msleep_interruptible(unsigned long msecs) - - Unlike the *delay family, the underlying mechanism - driving each of these calls varies, thus there are - quirks you should be aware of. - - - SLEEPING FOR "A FEW" USECS ( < ~10us? ): - * Use udelay - - - Why not usleep? - On slower systems, (embedded, OR perhaps a speed- - stepped PC!) the overhead of setting up the hrtimers - for usleep *may* not be worth it. Such an evaluation - will obviously depend on your specific situation, but - it is something to be aware of. - - SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms): - * Use usleep_range - - - Why not msleep for (1ms - 20ms)? - Explained originally here: - http://lkml.org/lkml/2007/8/3/250 - msleep(1~20) may not do what the caller intends, and - will often sleep longer (~20 ms actual sleep for any - value given in the 1~20ms range). In many cases this - is not the desired behavior. - - - Why is there no "usleep" / What is a good range? - Since usleep_range is built on top of hrtimers, the - wakeup will be very precise (ish), thus a simple - usleep function would likely introduce a large number - of undesired interrupts. - - With the introduction of a range, the scheduler is - free to coalesce your wakeup with any other wakeup - that may have happened for other reasons, or at the - worst case, fire an interrupt for your upper bound. - - The larger a range you supply, the greater a chance - that you will not trigger an interrupt; this should - be balanced with what is an acceptable upper bound on - delay / performance for your specific code path. Exact - tolerances here are very situation specific, thus it - is left to the caller to determine a reasonable range. - - SLEEPING FOR LARGER MSECS ( 10ms+ ) - * Use msleep or possibly msleep_interruptible - - - What's the difference? - msleep sets the current task to TASK_UNINTERRUPTIBLE - whereas msleep_interruptible sets the current task to - TASK_INTERRUPTIBLE before scheduling the sleep. In - short, the difference is whether the sleep can be ended - early by a signal. In general, just use msleep unless - you know you have a need for the interruptible variant. diff --git a/MAINTAINERS b/MAINTAINERS index 5fe44d5d82b4..0db7f12439f7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7192,7 +7192,7 @@ F: drivers/net/ethernet/hp/hp100.* HPET: High Precision Event Timers driver M: Clemens Ladisch S: Maintained -F: Documentation/timers/hpet.txt +F: Documentation/timers/hpet.rst F: drivers/char/hpet.c F: include/linux/hpet.h F: include/uapi/linux/hpet.h diff --git a/drivers/media/usb/dvb-usb-v2/anysee.c b/drivers/media/usb/dvb-usb-v2/anysee.c index 48fb0d41e03b..fb6d99dea31a 100644 --- a/drivers/media/usb/dvb-usb-v2/anysee.c +++ b/drivers/media/usb/dvb-usb-v2/anysee.c @@ -56,7 +56,7 @@ static int anysee_ctrl_msg(struct dvb_usb_device *d, /* TODO FIXME: dvb_usb_generic_rw() fails rarely with error code -32 * (EPIPE, Broken pipe). Function supports currently msleep() as a * parameter but I would not like to use it, since according to - * Documentation/timers/timers-howto.txt it should not be used such + * Documentation/timers/timers-howto.rst it should not be used such * short, under < 20ms, sleeps. Repeating failed message would be * better choice as not to add unwanted delays... * Fixing that correctly is one of those or both; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c894cf0d8a28..c5d8996d5165 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2304,7 +2304,7 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable) * * Delay for the requested amount of time as per the guidelines in: * - * Documentation/timers/timers-howto.txt + * Documentation/timers/timers-howto.rst * * The assumption here is that regulators will never be enabled in * atomic context and therefore sleeping functions can be used. diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 3908353deec6..35e15dfd4155 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -21,7 +21,7 @@ * @cond: Break condition (usually involving @val) * @sleep_us: Maximum time to sleep between reads in us (0 * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.txt). + * is used (see Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * * Returns 0 on success and -ETIMEDOUT upon a timeout. In either @@ -60,7 +60,7 @@ * @cond: Break condition (usually involving @val) * @delay_us: Time to udelay between reads in us (0 tight-loops). Should * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.txt). + * Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * * Returns 0 on success and -ETIMEDOUT upon a timeout. In either diff --git a/include/linux/regmap.h b/include/linux/regmap.h index daeec7dbd65c..ed5e9d0a1285 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -112,7 +112,7 @@ struct reg_sequence { * @cond: Break condition (usually involving @val) * @sleep_us: Maximum time to sleep between reads in us (0 * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.txt). + * is used (see Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read @@ -154,7 +154,7 @@ struct reg_sequence { * @cond: Break condition (usually involving @val) * @sleep_us: Maximum time to sleep between reads in us (0 * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.txt). + * is used (see Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 342c7c781ba5..a6d436809bf5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5712,7 +5712,7 @@ sub process { # ignore udelay's < 10, however if (! ($delay < 10) ) { CHK("USLEEP_RANGE", - "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $herecurr); + "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.rst\n" . $herecurr); } if ($delay > 2000) { WARN("LONG_UDELAY", @@ -5724,7 +5724,7 @@ sub process { if ($line =~ /\bmsleep\s*\((\d+)\);/) { if ($1 < 20) { WARN("MSLEEP", - "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $herecurr); + "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.rst\n" . $herecurr); } } @@ -6115,11 +6115,11 @@ sub process { my $max = $7; if ($min eq $max) { WARN("USLEEP_RANGE", - "usleep_range should not use min == max args; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n"); + "usleep_range should not use min == max args; see Documentation/timers/timers-howto.rst\n" . "$here\n$stat\n"); } elsif ($min =~ /^\d+$/ && $max =~ /^\d+$/ && $min > $max) { WARN("USLEEP_RANGE", - "usleep_range args reversed, use min then max; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n"); + "usleep_range args reversed, use min then max; see Documentation/timers/timers-howto.rst\n" . "$here\n$stat\n"); } } diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h index 80fc3b374c2b..8058a6c73082 100644 --- a/sound/soc/sof/ops.h +++ b/sound/soc/sof/ops.h @@ -349,7 +349,7 @@ static inline const struct snd_sof_dsp_ops * @cond: Break condition (usually involving @val) * @sleep_us: Maximum time to sleep between reads in us (0 * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.txt). + * is used (see Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * * Returns 0 on success and -ETIMEDOUT upon a timeout. In either -- cgit v1.2.3 From 151f4e2bdc7a04020ae5c533896fb91a16e1f501 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Jun 2019 07:10:36 -0300 Subject: docs: power: convert docs to ReST and rename to *.rst Convert the PM documents to ReST, in order to allow them to build with Sphinx. The conversion is actually: - add blank lines and indentation in order to identify paragraphs; - fix tables markups; - add some lists markups; - mark literal blocks; - adjust title markups. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Bjorn Helgaas Acked-by: Mark Brown Acked-by: Srivatsa S. Bhat (VMware) --- Documentation/ABI/testing/sysfs-class-powercap | 2 +- Documentation/admin-guide/kernel-parameters.txt | 6 +- Documentation/cpu-freq/core.txt | 2 +- Documentation/driver-api/pm/devices.rst | 6 +- Documentation/driver-api/usb/power-management.rst | 2 +- Documentation/power/apm-acpi.rst | 36 + Documentation/power/apm-acpi.txt | 32 - Documentation/power/basic-pm-debugging.rst | 269 +++++ Documentation/power/basic-pm-debugging.txt | 254 ----- Documentation/power/charger-manager.rst | 205 ++++ Documentation/power/charger-manager.txt | 200 ---- Documentation/power/drivers-testing.rst | 51 + Documentation/power/drivers-testing.txt | 46 - Documentation/power/energy-model.rst | 147 +++ Documentation/power/energy-model.txt | 144 --- Documentation/power/freezing-of-tasks.rst | 244 +++++ Documentation/power/freezing-of-tasks.txt | 231 ---- Documentation/power/index.rst | 46 + Documentation/power/interface.rst | 79 ++ Documentation/power/interface.txt | 77 -- Documentation/power/opp.rst | 379 +++++++ Documentation/power/opp.txt | 342 ------ Documentation/power/pci.rst | 1135 ++++++++++++++++++++ Documentation/power/pci.txt | 1094 ------------------- Documentation/power/pm_qos_interface.rst | 225 ++++ Documentation/power/pm_qos_interface.txt | 212 ---- Documentation/power/power_supply_class.rst | 282 +++++ Documentation/power/power_supply_class.txt | 231 ---- Documentation/power/powercap/powercap.rst | 257 +++++ Documentation/power/powercap/powercap.txt | 236 ---- Documentation/power/regulator/consumer.rst | 229 ++++ Documentation/power/regulator/consumer.txt | 218 ---- Documentation/power/regulator/design.rst | 38 + Documentation/power/regulator/design.txt | 33 - Documentation/power/regulator/machine.rst | 97 ++ Documentation/power/regulator/machine.txt | 96 -- Documentation/power/regulator/overview.rst | 178 +++ Documentation/power/regulator/overview.txt | 171 --- Documentation/power/regulator/regulator.rst | 32 + Documentation/power/regulator/regulator.txt | 30 - Documentation/power/runtime_pm.rst | 940 ++++++++++++++++ Documentation/power/runtime_pm.txt | 928 ---------------- Documentation/power/s2ram.rst | 87 ++ Documentation/power/s2ram.txt | 85 -- Documentation/power/suspend-and-cpuhotplug.rst | 286 +++++ Documentation/power/suspend-and-cpuhotplug.txt | 274 ----- Documentation/power/suspend-and-interrupts.rst | 137 +++ Documentation/power/suspend-and-interrupts.txt | 135 --- Documentation/power/swsusp-and-swap-files.rst | 63 ++ Documentation/power/swsusp-and-swap-files.txt | 60 -- Documentation/power/swsusp-dmcrypt.rst | 140 +++ Documentation/power/swsusp-dmcrypt.txt | 138 --- Documentation/power/swsusp.rst | 501 +++++++++ Documentation/power/swsusp.txt | 446 -------- Documentation/power/tricks.rst | 29 + Documentation/power/tricks.txt | 27 - Documentation/power/userland-swsusp.rst | 191 ++++ Documentation/power/userland-swsusp.txt | 170 --- Documentation/power/video.rst | 213 ++++ Documentation/power/video.txt | 185 ---- Documentation/process/submitting-drivers.rst | 2 +- Documentation/scheduler/sched-energy.txt | 6 +- Documentation/trace/coresight-cpu-debug.txt | 2 +- .../zh_CN/process/submitting-drivers.rst | 2 +- MAINTAINERS | 4 +- arch/x86/Kconfig | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/opp/Kconfig | 2 +- drivers/power/supply/power_supply_core.c | 2 +- include/linux/interrupt.h | 2 +- include/linux/pci.h | 2 +- include/linux/pm.h | 2 +- kernel/power/Kconfig | 6 +- net/wireless/Kconfig | 2 +- 74 files changed, 6544 insertions(+), 6123 deletions(-) create mode 100644 Documentation/power/apm-acpi.rst delete mode 100644 Documentation/power/apm-acpi.txt create mode 100644 Documentation/power/basic-pm-debugging.rst delete mode 100644 Documentation/power/basic-pm-debugging.txt create mode 100644 Documentation/power/charger-manager.rst delete mode 100644 Documentation/power/charger-manager.txt create mode 100644 Documentation/power/drivers-testing.rst delete mode 100644 Documentation/power/drivers-testing.txt create mode 100644 Documentation/power/energy-model.rst delete mode 100644 Documentation/power/energy-model.txt create mode 100644 Documentation/power/freezing-of-tasks.rst delete mode 100644 Documentation/power/freezing-of-tasks.txt create mode 100644 Documentation/power/index.rst create mode 100644 Documentation/power/interface.rst delete mode 100644 Documentation/power/interface.txt create mode 100644 Documentation/power/opp.rst delete mode 100644 Documentation/power/opp.txt create mode 100644 Documentation/power/pci.rst delete mode 100644 Documentation/power/pci.txt create mode 100644 Documentation/power/pm_qos_interface.rst delete mode 100644 Documentation/power/pm_qos_interface.txt create mode 100644 Documentation/power/power_supply_class.rst delete mode 100644 Documentation/power/power_supply_class.txt create mode 100644 Documentation/power/powercap/powercap.rst delete mode 100644 Documentation/power/powercap/powercap.txt create mode 100644 Documentation/power/regulator/consumer.rst delete mode 100644 Documentation/power/regulator/consumer.txt create mode 100644 Documentation/power/regulator/design.rst delete mode 100644 Documentation/power/regulator/design.txt create mode 100644 Documentation/power/regulator/machine.rst delete mode 100644 Documentation/power/regulator/machine.txt create mode 100644 Documentation/power/regulator/overview.rst delete mode 100644 Documentation/power/regulator/overview.txt create mode 100644 Documentation/power/regulator/regulator.rst delete mode 100644 Documentation/power/regulator/regulator.txt create mode 100644 Documentation/power/runtime_pm.rst delete mode 100644 Documentation/power/runtime_pm.txt create mode 100644 Documentation/power/s2ram.rst delete mode 100644 Documentation/power/s2ram.txt create mode 100644 Documentation/power/suspend-and-cpuhotplug.rst delete mode 100644 Documentation/power/suspend-and-cpuhotplug.txt create mode 100644 Documentation/power/suspend-and-interrupts.rst delete mode 100644 Documentation/power/suspend-and-interrupts.txt create mode 100644 Documentation/power/swsusp-and-swap-files.rst delete mode 100644 Documentation/power/swsusp-and-swap-files.txt create mode 100644 Documentation/power/swsusp-dmcrypt.rst delete mode 100644 Documentation/power/swsusp-dmcrypt.txt create mode 100644 Documentation/power/swsusp.rst delete mode 100644 Documentation/power/swsusp.txt create mode 100644 Documentation/power/tricks.rst delete mode 100644 Documentation/power/tricks.txt create mode 100644 Documentation/power/userland-swsusp.rst delete mode 100644 Documentation/power/userland-swsusp.txt create mode 100644 Documentation/power/video.rst delete mode 100644 Documentation/power/video.txt (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap index db3b3ff70d84..742dfd966592 100644 --- a/Documentation/ABI/testing/sysfs-class-powercap +++ b/Documentation/ABI/testing/sysfs-class-powercap @@ -5,7 +5,7 @@ Contact: linux-pm@vger.kernel.org Description: The powercap/ class sub directory belongs to the power cap subsystem. Refer to - Documentation/power/powercap/powercap.txt for details. + Documentation/power/powercap/powercap.rst for details. What: /sys/class/powercap/ Date: September 2013 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..7f5ca6e7c4d3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -13,7 +13,7 @@ For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force" are available - See also Documentation/power/runtime_pm.txt, pci=noacpi + See also Documentation/power/runtime_pm.rst, pci=noacpi acpi_apic_instance= [ACPI, IOAPIC] Format: @@ -223,7 +223,7 @@ acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering, nonvs, sci_force_enable, nobl } - See Documentation/power/video.txt for information on + See Documentation/power/video.rst for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. @@ -4108,7 +4108,7 @@ Specify the offset from the beginning of the partition given by "resume=" at which the swap header is located, in units (needed only for swap files). - See Documentation/power/swsusp-and-swap-files.txt + See Documentation/power/swsusp-and-swap-files.rst resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to read the resume files diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index 073f128af5a7..55193e680250 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -95,7 +95,7 @@ flags - flags of the cpufreq driver 3. CPUFreq Table Generation with Operating Performance Point (OPP) ================================================================== -For details about OPP, see Documentation/power/opp.txt +For details about OPP, see Documentation/power/opp.rst dev_pm_opp_init_cpufreq_table - This function provides a ready to use conversion routine to translate diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 30835683616a..f66c7b9126ea 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -225,7 +225,7 @@ system-wide transition to a sleep state even though its :c:member:`runtime_auto` flag is clear. For more information about the runtime power management framework, refer to -:file:`Documentation/power/runtime_pm.txt`. +:file:`Documentation/power/runtime_pm.rst`. Calling Drivers to Enter and Leave System Sleep States @@ -728,7 +728,7 @@ it into account in any way. Devices may be defined as IRQ-safe which indicates to the PM core that their runtime PM callbacks may be invoked with disabled interrupts (see -:file:`Documentation/power/runtime_pm.txt` for more information). If an +:file:`Documentation/power/runtime_pm.rst` for more information). If an IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be disallowed, unless the domain itself is defined as IRQ-safe. However, it makes sense to define a PM domain as IRQ-safe only if all the devices in it @@ -795,7 +795,7 @@ so on) and the final state of the device must reflect the "active" runtime PM status in that case. During system-wide resume from a sleep state it's easiest to put devices into -the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`. +the full-power state, as explained in :file:`Documentation/power/runtime_pm.rst`. [Refer to that document for more information regarding this particular issue as well as for information on the device runtime power management framework in general.] diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst index 4a74cf6f2797..2525c3622cae 100644 --- a/Documentation/driver-api/usb/power-management.rst +++ b/Documentation/driver-api/usb/power-management.rst @@ -46,7 +46,7 @@ device is turned off while the system as a whole remains running, we call it a "dynamic suspend" (also known as a "runtime suspend" or "selective suspend"). This document concentrates mostly on how dynamic PM is implemented in the USB subsystem, although system PM is -covered to some extent (see ``Documentation/power/*.txt`` for more +covered to some extent (see ``Documentation/power/*.rst`` for more information about system PM). System PM support is present only if the kernel was built with diff --git a/Documentation/power/apm-acpi.rst b/Documentation/power/apm-acpi.rst new file mode 100644 index 000000000000..5b90d947126d --- /dev/null +++ b/Documentation/power/apm-acpi.rst @@ -0,0 +1,36 @@ +============ +APM or ACPI? +============ + +If you have a relatively recent x86 mobile, desktop, or server system, +odds are it supports either Advanced Power Management (APM) or +Advanced Configuration and Power Interface (ACPI). ACPI is the newer +of the two technologies and puts power management in the hands of the +operating system, allowing for more intelligent power management than +is possible with BIOS controlled APM. + +The best way to determine which, if either, your system supports is to +build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is +enabled by default). If a working ACPI implementation is found, the +ACPI driver will override and disable APM, otherwise the APM driver +will be used. + +No, sorry, you cannot have both ACPI and APM enabled and running at +once. Some people with broken ACPI or broken APM implementations +would like to use both to get a full set of working features, but you +simply cannot mix and match the two. Only one power management +interface can be in control of the machine at once. Think about it.. + +User-space Daemons +------------------ +Both APM and ACPI rely on user-space daemons, apmd and acpid +respectively, to be completely functional. Obtain both of these +daemons from your Linux distribution or from the Internet (see below) +and be sure that they are started sometime in the system boot process. +Go ahead and start both. If ACPI or APM is not available on your +system the associated daemon will exit gracefully. + + ===== ======================================= + apmd http://ftp.debian.org/pool/main/a/apmd/ + acpid http://acpid.sf.net/ + ===== ======================================= diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.txt deleted file mode 100644 index 6cc423d3662e..000000000000 --- a/Documentation/power/apm-acpi.txt +++ /dev/null @@ -1,32 +0,0 @@ -APM or ACPI? ------------- -If you have a relatively recent x86 mobile, desktop, or server system, -odds are it supports either Advanced Power Management (APM) or -Advanced Configuration and Power Interface (ACPI). ACPI is the newer -of the two technologies and puts power management in the hands of the -operating system, allowing for more intelligent power management than -is possible with BIOS controlled APM. - -The best way to determine which, if either, your system supports is to -build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is -enabled by default). If a working ACPI implementation is found, the -ACPI driver will override and disable APM, otherwise the APM driver -will be used. - -No, sorry, you cannot have both ACPI and APM enabled and running at -once. Some people with broken ACPI or broken APM implementations -would like to use both to get a full set of working features, but you -simply cannot mix and match the two. Only one power management -interface can be in control of the machine at once. Think about it.. - -User-space Daemons ------------------- -Both APM and ACPI rely on user-space daemons, apmd and acpid -respectively, to be completely functional. Obtain both of these -daemons from your Linux distribution or from the Internet (see below) -and be sure that they are started sometime in the system boot process. -Go ahead and start both. If ACPI or APM is not available on your -system the associated daemon will exit gracefully. - - apmd: http://ftp.debian.org/pool/main/a/apmd/ - acpid: http://acpid.sf.net/ diff --git a/Documentation/power/basic-pm-debugging.rst b/Documentation/power/basic-pm-debugging.rst new file mode 100644 index 000000000000..69862e759c30 --- /dev/null +++ b/Documentation/power/basic-pm-debugging.rst @@ -0,0 +1,269 @@ +================================= +Debugging hibernation and suspend +================================= + + (C) 2007 Rafael J. Wysocki , GPL + +1. Testing hibernation (aka suspend to disk or STD) +=================================================== + +To check if hibernation works, you can try to hibernate in the "reboot" mode:: + + # echo reboot > /sys/power/disk + # echo disk > /sys/power/state + +and the system should create a hibernation image, reboot, resume and get back to +the command prompt where you have started the transition. If that happens, +hibernation is most likely to work correctly. Still, you need to repeat the +test at least a couple of times in a row for confidence. [This is necessary, +because some problems only show up on a second attempt at suspending and +resuming the system.] Moreover, hibernating in the "reboot" and "shutdown" +modes causes the PM core to skip some platform-related callbacks which on ACPI +systems might be necessary to make hibernation work. Thus, if your machine +fails to hibernate or resume in the "reboot" mode, you should try the +"platform" mode:: + + # echo platform > /sys/power/disk + # echo disk > /sys/power/state + +which is the default and recommended mode of hibernation. + +Unfortunately, the "platform" mode of hibernation does not work on some systems +with broken BIOSes. In such cases the "shutdown" mode of hibernation might +work:: + + # echo shutdown > /sys/power/disk + # echo disk > /sys/power/state + +(it is similar to the "reboot" mode, but it requires you to press the power +button to make the system resume). + +If neither "platform" nor "shutdown" hibernation mode works, you will need to +identify what goes wrong. + +a) Test modes of hibernation +---------------------------- + +To find out why hibernation fails on your system, you can use a special testing +facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then, +there is the file /sys/power/pm_test that can be used to make the hibernation +core run in a test mode. There are 5 test modes available: + +freezer + - test the freezing of processes + +devices + - test the freezing of processes and suspending of devices + +platform + - test the freezing of processes, suspending of devices and platform + global control methods [1]_ + +processors + - test the freezing of processes, suspending of devices, platform + global control methods [1]_ and the disabling of nonboot CPUs + +core + - test the freezing of processes, suspending of devices, platform global + control methods\ [1]_, the disabling of nonboot CPUs and suspending + of platform/system devices + +.. [1] + + the platform global control methods are only available on ACPI systems + and are only tested if the hibernation mode is set to "platform" + +To use one of them it is necessary to write the corresponding string to +/sys/power/pm_test (eg. "devices" to test the freezing of processes and +suspending devices) and issue the standard hibernation commands. For example, +to use the "devices" test mode along with the "platform" mode of hibernation, +you should do the following:: + + # echo devices > /sys/power/pm_test + # echo platform > /sys/power/disk + # echo disk > /sys/power/state + +Then, the kernel will try to freeze processes, suspend devices, wait a few +seconds (5 by default, but configurable by the suspend.pm_test_delay module +parameter), resume devices and thaw processes. If "platform" is written to +/sys/power/pm_test , then after suspending devices the kernel will additionally +invoke the global control methods (eg. ACPI global control methods) used to +prepare the platform firmware for hibernation. Next, it will wait a +configurable number of seconds and invoke the platform (eg. ACPI) global +methods used to cancel hibernation etc. + +Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal +hibernation/suspend operations. Also, when open for reading, /sys/power/pm_test +contains a space-separated list of all available tests (including "none" that +represents the normal functionality) in which the current test level is +indicated by square brackets. + +Generally, as you can see, each test level is more "invasive" than the previous +one and the "core" level tests the hardware and drivers as deeply as possible +without creating a hibernation image. Obviously, if the "devices" test fails, +the "platform" test will fail as well and so on. Thus, as a rule of thumb, you +should try the test modes starting from "freezer", through "devices", "platform" +and "processors" up to "core" (repeat the test on each level a couple of times +to make sure that any random factors are avoided). + +If the "freezer" test fails, there is a task that cannot be frozen (in that case +it usually is possible to identify the offending task by analysing the output of +dmesg obtained after the failing test). Failure at this level usually means +that there is a problem with the tasks freezer subsystem that should be +reported. + +If the "devices" test fails, most likely there is a driver that cannot suspend +or resume its device (in the latter case the system may hang or become unstable +after the test, so please take that into consideration). To find this driver, +you can carry out a binary search according to the rules: + +- if the test fails, unload a half of the drivers currently loaded and repeat + (that would probably involve rebooting the system, so always note what drivers + have been loaded before the test), +- if the test succeeds, load a half of the drivers you have unloaded most + recently and repeat. + +Once you have found the failing driver (there can be more than just one of +them), you have to unload it every time before hibernation. In that case please +make sure to report the problem with the driver. + +It is also possible that the "devices" test will still fail after you have +unloaded all modules. In that case, you may want to look in your kernel +configuration for the drivers that can be compiled as modules (and test again +with these drivers compiled as modules). You may also try to use some special +kernel command line options such as "noapic", "noacpi" or even "acpi=off". + +If the "platform" test fails, there is a problem with the handling of the +platform (eg. ACPI) firmware on your system. In that case the "platform" mode +of hibernation is not likely to work. You can try the "shutdown" mode, but that +is rather a poor man's workaround. + +If the "processors" test fails, the disabling/enabling of nonboot CPUs does not +work (of course, this only may be an issue on SMP systems) and the problem +should be reported. In that case you can also try to switch the nonboot CPUs +off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and +see if that works. + +If the "core" test fails, which means that suspending of the system/platform +devices has failed (these devices are suspended on one CPU with interrupts off), +the problem is most probably hardware-related and serious, so it should be +reported. + +A failure of any of the "platform", "processors" or "core" tests may cause your +system to hang or become unstable, so please beware. Such a failure usually +indicates a serious problem that very well may be related to the hardware, but +please report it anyway. + +b) Testing minimal configuration +-------------------------------- + +If all of the hibernation test modes work, you can boot the system with the +"init=/bin/bash" command line parameter and attempt to hibernate in the +"reboot", "shutdown" and "platform" modes. If that does not work, there +probably is a problem with a driver statically compiled into the kernel and you +can try to compile more drivers as modules, so that they can be tested +individually. Otherwise, there is a problem with a modular driver and you can +find it by loading a half of the modules you normally use and binary searching +in accordance with the algorithm: +- if there are n modules loaded and the attempt to suspend and resume fails, +unload n/2 of the modules and try again (that would probably involve rebooting +the system), +- if there are n modules loaded and the attempt to suspend and resume succeeds, +load n/2 modules more and try again. + +Again, if you find the offending module(s), it(they) must be unloaded every time +before hibernation, and please report the problem with it(them). + +c) Using the "test_resume" hibernation option +--------------------------------------------- + +/sys/power/disk generally tells the kernel what to do after creating a +hibernation image. One of the available options is "test_resume" which +causes the just created image to be used for immediate restoration. Namely, +after doing:: + + # echo test_resume > /sys/power/disk + # echo disk > /sys/power/state + +a hibernation image will be created and a resume from it will be triggered +immediately without involving the platform firmware in any way. + +That test can be used to check if failures to resume from hibernation are +related to bad interactions with the platform firmware. That is, if the above +works every time, but resume from actual hibernation does not work or is +unreliable, the platform firmware may be responsible for the failures. + +On architectures and platforms that support using different kernels to restore +hibernation images (that is, the kernel used to read the image from storage and +load it into memory is different from the one included in the image) or support +kernel address space randomization, it also can be used to check if failures +to resume may be related to the differences between the restore and image +kernels. + +d) Advanced debugging +--------------------- + +In case that hibernation does not work on your system even in the minimal +configuration and compiling more drivers as modules is not practical or some +modules cannot be unloaded, you can use one of the more advanced debugging +techniques to find the problem. First, if there is a serial port in your box, +you can boot the kernel with the 'no_console_suspend' parameter and try to log +kernel messages using the serial console. This may provide you with some +information about the reasons of the suspend (resume) failure. Alternatively, +it may be possible to use a FireWire port for debugging with firescope +(http://v3.sk/~lkundrak/firescope/). On x86 it is also possible to +use the PM_TRACE mechanism documented in Documentation/power/s2ram.rst . + +2. Testing suspend to RAM (STR) +=============================== + +To verify that the STR works, it is generally more convenient to use the s2ram +tool available from http://suspend.sf.net and documented at +http://en.opensuse.org/SDB:Suspend_to_RAM (S2RAM_LINK). + +Namely, after writing "freezer", "devices", "platform", "processors", or "core" +into /sys/power/pm_test (available if the kernel is compiled with +CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding +to given string. The STR test modes are defined in the same way as for +hibernation, so please refer to Section 1 for more information about them. In +particular, the "core" test allows you to test everything except for the actual +invocation of the platform firmware in order to put the system into the sleep +state. + +Among other things, the testing with the help of /sys/power/pm_test may allow +you to identify drivers that fail to suspend or resume their devices. They +should be unloaded every time before an STR transition. + +Next, you can follow the instructions at S2RAM_LINK to test the system, but if +it does not work "out of the box", you may need to boot it with +"init=/bin/bash" and test s2ram in the minimal configuration. In that case, +you may be able to search for failing drivers by following the procedure +analogous to the one described in section 1. If you find some failing drivers, +you will have to unload them every time before an STR transition (ie. before +you run s2ram), and please report the problems with them. + +There is a debugfs entry which shows the suspend to RAM statistics. Here is an +example of its output:: + + # mount -t debugfs none /sys/kernel/debug + # cat /sys/kernel/debug/suspend_stats + success: 20 + fail: 5 + failed_freeze: 0 + failed_prepare: 0 + failed_suspend: 5 + failed_suspend_noirq: 0 + failed_resume: 0 + failed_resume_noirq: 0 + failures: + last_failed_dev: alarm + adc + last_failed_errno: -16 + -16 + last_failed_step: suspend + suspend + +Field success means the success number of suspend to RAM, and field fail means +the failure number. Others are the failure number of different steps of suspend +to RAM. suspend_stats just lists the last 2 failed devices, error number and +failed step of suspend. diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt deleted file mode 100644 index 708f87f78a75..000000000000 --- a/Documentation/power/basic-pm-debugging.txt +++ /dev/null @@ -1,254 +0,0 @@ -Debugging hibernation and suspend - (C) 2007 Rafael J. Wysocki , GPL - -1. Testing hibernation (aka suspend to disk or STD) - -To check if hibernation works, you can try to hibernate in the "reboot" mode: - -# echo reboot > /sys/power/disk -# echo disk > /sys/power/state - -and the system should create a hibernation image, reboot, resume and get back to -the command prompt where you have started the transition. If that happens, -hibernation is most likely to work correctly. Still, you need to repeat the -test at least a couple of times in a row for confidence. [This is necessary, -because some problems only show up on a second attempt at suspending and -resuming the system.] Moreover, hibernating in the "reboot" and "shutdown" -modes causes the PM core to skip some platform-related callbacks which on ACPI -systems might be necessary to make hibernation work. Thus, if your machine fails -to hibernate or resume in the "reboot" mode, you should try the "platform" mode: - -# echo platform > /sys/power/disk -# echo disk > /sys/power/state - -which is the default and recommended mode of hibernation. - -Unfortunately, the "platform" mode of hibernation does not work on some systems -with broken BIOSes. In such cases the "shutdown" mode of hibernation might -work: - -# echo shutdown > /sys/power/disk -# echo disk > /sys/power/state - -(it is similar to the "reboot" mode, but it requires you to press the power -button to make the system resume). - -If neither "platform" nor "shutdown" hibernation mode works, you will need to -identify what goes wrong. - -a) Test modes of hibernation - -To find out why hibernation fails on your system, you can use a special testing -facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then, -there is the file /sys/power/pm_test that can be used to make the hibernation -core run in a test mode. There are 5 test modes available: - -freezer -- test the freezing of processes - -devices -- test the freezing of processes and suspending of devices - -platform -- test the freezing of processes, suspending of devices and platform - global control methods(*) - -processors -- test the freezing of processes, suspending of devices, platform - global control methods(*) and the disabling of nonboot CPUs - -core -- test the freezing of processes, suspending of devices, platform global - control methods(*), the disabling of nonboot CPUs and suspending of - platform/system devices - -(*) the platform global control methods are only available on ACPI systems - and are only tested if the hibernation mode is set to "platform" - -To use one of them it is necessary to write the corresponding string to -/sys/power/pm_test (eg. "devices" to test the freezing of processes and -suspending devices) and issue the standard hibernation commands. For example, -to use the "devices" test mode along with the "platform" mode of hibernation, -you should do the following: - -# echo devices > /sys/power/pm_test -# echo platform > /sys/power/disk -# echo disk > /sys/power/state - -Then, the kernel will try to freeze processes, suspend devices, wait a few -seconds (5 by default, but configurable by the suspend.pm_test_delay module -parameter), resume devices and thaw processes. If "platform" is written to -/sys/power/pm_test , then after suspending devices the kernel will additionally -invoke the global control methods (eg. ACPI global control methods) used to -prepare the platform firmware for hibernation. Next, it will wait a -configurable number of seconds and invoke the platform (eg. ACPI) global -methods used to cancel hibernation etc. - -Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal -hibernation/suspend operations. Also, when open for reading, /sys/power/pm_test -contains a space-separated list of all available tests (including "none" that -represents the normal functionality) in which the current test level is -indicated by square brackets. - -Generally, as you can see, each test level is more "invasive" than the previous -one and the "core" level tests the hardware and drivers as deeply as possible -without creating a hibernation image. Obviously, if the "devices" test fails, -the "platform" test will fail as well and so on. Thus, as a rule of thumb, you -should try the test modes starting from "freezer", through "devices", "platform" -and "processors" up to "core" (repeat the test on each level a couple of times -to make sure that any random factors are avoided). - -If the "freezer" test fails, there is a task that cannot be frozen (in that case -it usually is possible to identify the offending task by analysing the output of -dmesg obtained after the failing test). Failure at this level usually means -that there is a problem with the tasks freezer subsystem that should be -reported. - -If the "devices" test fails, most likely there is a driver that cannot suspend -or resume its device (in the latter case the system may hang or become unstable -after the test, so please take that into consideration). To find this driver, -you can carry out a binary search according to the rules: -- if the test fails, unload a half of the drivers currently loaded and repeat -(that would probably involve rebooting the system, so always note what drivers -have been loaded before the test), -- if the test succeeds, load a half of the drivers you have unloaded most -recently and repeat. - -Once you have found the failing driver (there can be more than just one of -them), you have to unload it every time before hibernation. In that case please -make sure to report the problem with the driver. - -It is also possible that the "devices" test will still fail after you have -unloaded all modules. In that case, you may want to look in your kernel -configuration for the drivers that can be compiled as modules (and test again -with these drivers compiled as modules). You may also try to use some special -kernel command line options such as "noapic", "noacpi" or even "acpi=off". - -If the "platform" test fails, there is a problem with the handling of the -platform (eg. ACPI) firmware on your system. In that case the "platform" mode -of hibernation is not likely to work. You can try the "shutdown" mode, but that -is rather a poor man's workaround. - -If the "processors" test fails, the disabling/enabling of nonboot CPUs does not -work (of course, this only may be an issue on SMP systems) and the problem -should be reported. In that case you can also try to switch the nonboot CPUs -off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and -see if that works. - -If the "core" test fails, which means that suspending of the system/platform -devices has failed (these devices are suspended on one CPU with interrupts off), -the problem is most probably hardware-related and serious, so it should be -reported. - -A failure of any of the "platform", "processors" or "core" tests may cause your -system to hang or become unstable, so please beware. Such a failure usually -indicates a serious problem that very well may be related to the hardware, but -please report it anyway. - -b) Testing minimal configuration - -If all of the hibernation test modes work, you can boot the system with the -"init=/bin/bash" command line parameter and attempt to hibernate in the -"reboot", "shutdown" and "platform" modes. If that does not work, there -probably is a problem with a driver statically compiled into the kernel and you -can try to compile more drivers as modules, so that they can be tested -individually. Otherwise, there is a problem with a modular driver and you can -find it by loading a half of the modules you normally use and binary searching -in accordance with the algorithm: -- if there are n modules loaded and the attempt to suspend and resume fails, -unload n/2 of the modules and try again (that would probably involve rebooting -the system), -- if there are n modules loaded and the attempt to suspend and resume succeeds, -load n/2 modules more and try again. - -Again, if you find the offending module(s), it(they) must be unloaded every time -before hibernation, and please report the problem with it(them). - -c) Using the "test_resume" hibernation option - -/sys/power/disk generally tells the kernel what to do after creating a -hibernation image. One of the available options is "test_resume" which -causes the just created image to be used for immediate restoration. Namely, -after doing: - -# echo test_resume > /sys/power/disk -# echo disk > /sys/power/state - -a hibernation image will be created and a resume from it will be triggered -immediately without involving the platform firmware in any way. - -That test can be used to check if failures to resume from hibernation are -related to bad interactions with the platform firmware. That is, if the above -works every time, but resume from actual hibernation does not work or is -unreliable, the platform firmware may be responsible for the failures. - -On architectures and platforms that support using different kernels to restore -hibernation images (that is, the kernel used to read the image from storage and -load it into memory is different from the one included in the image) or support -kernel address space randomization, it also can be used to check if failures -to resume may be related to the differences between the restore and image -kernels. - -d) Advanced debugging - -In case that hibernation does not work on your system even in the minimal -configuration and compiling more drivers as modules is not practical or some -modules cannot be unloaded, you can use one of the more advanced debugging -techniques to find the problem. First, if there is a serial port in your box, -you can boot the kernel with the 'no_console_suspend' parameter and try to log -kernel messages using the serial console. This may provide you with some -information about the reasons of the suspend (resume) failure. Alternatively, -it may be possible to use a FireWire port for debugging with firescope -(http://v3.sk/~lkundrak/firescope/). On x86 it is also possible to -use the PM_TRACE mechanism documented in Documentation/power/s2ram.txt . - -2. Testing suspend to RAM (STR) - -To verify that the STR works, it is generally more convenient to use the s2ram -tool available from http://suspend.sf.net and documented at -http://en.opensuse.org/SDB:Suspend_to_RAM (S2RAM_LINK). - -Namely, after writing "freezer", "devices", "platform", "processors", or "core" -into /sys/power/pm_test (available if the kernel is compiled with -CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding -to given string. The STR test modes are defined in the same way as for -hibernation, so please refer to Section 1 for more information about them. In -particular, the "core" test allows you to test everything except for the actual -invocation of the platform firmware in order to put the system into the sleep -state. - -Among other things, the testing with the help of /sys/power/pm_test may allow -you to identify drivers that fail to suspend or resume their devices. They -should be unloaded every time before an STR transition. - -Next, you can follow the instructions at S2RAM_LINK to test the system, but if -it does not work "out of the box", you may need to boot it with -"init=/bin/bash" and test s2ram in the minimal configuration. In that case, -you may be able to search for failing drivers by following the procedure -analogous to the one described in section 1. If you find some failing drivers, -you will have to unload them every time before an STR transition (ie. before -you run s2ram), and please report the problems with them. - -There is a debugfs entry which shows the suspend to RAM statistics. Here is an -example of its output. - # mount -t debugfs none /sys/kernel/debug - # cat /sys/kernel/debug/suspend_stats - success: 20 - fail: 5 - failed_freeze: 0 - failed_prepare: 0 - failed_suspend: 5 - failed_suspend_noirq: 0 - failed_resume: 0 - failed_resume_noirq: 0 - failures: - last_failed_dev: alarm - adc - last_failed_errno: -16 - -16 - last_failed_step: suspend - suspend -Field success means the success number of suspend to RAM, and field fail means -the failure number. Others are the failure number of different steps of suspend -to RAM. suspend_stats just lists the last 2 failed devices, error number and -failed step of suspend. diff --git a/Documentation/power/charger-manager.rst b/Documentation/power/charger-manager.rst new file mode 100644 index 000000000000..84fab9376792 --- /dev/null +++ b/Documentation/power/charger-manager.rst @@ -0,0 +1,205 @@ +=============== +Charger Manager +=============== + + (C) 2011 MyungJoo Ham , GPL + +Charger Manager provides in-kernel battery charger management that +requires temperature monitoring during suspend-to-RAM state +and where each battery may have multiple chargers attached and the userland +wants to look at the aggregated information of the multiple chargers. + +Charger Manager is a platform_driver with power-supply-class entries. +An instance of Charger Manager (a platform-device created with Charger-Manager) +represents an independent battery with chargers. If there are multiple +batteries with their own chargers acting independently in a system, +the system may need multiple instances of Charger Manager. + +1. Introduction +=============== + +Charger Manager supports the following: + +* Support for multiple chargers (e.g., a device with USB, AC, and solar panels) + A system may have multiple chargers (or power sources) and some of + they may be activated at the same time. Each charger may have its + own power-supply-class and each power-supply-class can provide + different information about the battery status. This framework + aggregates charger-related information from multiple sources and + shows combined information as a single power-supply-class. + +* Support for in suspend-to-RAM polling (with suspend_again callback) + While the battery is being charged and the system is in suspend-to-RAM, + we may need to monitor the battery health by looking at the ambient or + battery temperature. We can accomplish this by waking up the system + periodically. However, such a method wakes up devices unnecessarily for + monitoring the battery health and tasks, and user processes that are + supposed to be kept suspended. That, in turn, incurs unnecessary power + consumption and slow down charging process. Or even, such peak power + consumption can stop chargers in the middle of charging + (external power input < device power consumption), which not + only affects the charging time, but the lifespan of the battery. + + Charger Manager provides a function "cm_suspend_again" that can be + used as suspend_again callback of platform_suspend_ops. If the platform + requires tasks other than cm_suspend_again, it may implement its own + suspend_again callback that calls cm_suspend_again in the middle. + Normally, the platform will need to resume and suspend some devices + that are used by Charger Manager. + +* Support for premature full-battery event handling + If the battery voltage drops by "fullbatt_vchkdrop_uV" after + "fullbatt_vchkdrop_ms" from the full-battery event, the framework + restarts charging. This check is also performed while suspended by + setting wakeup time accordingly and using suspend_again. + +* Support for uevent-notify + With the charger-related events, the device sends + notification to users with UEVENT. + +2. Global Charger-Manager Data related with suspend_again +========================================================= +In order to setup Charger Manager with suspend-again feature +(in-suspend monitoring), the user should provide charger_global_desc +with setup_charger_manager(`struct charger_global_desc *`). +This charger_global_desc data for in-suspend monitoring is global +as the name suggests. Thus, the user needs to provide only once even +if there are multiple batteries. If there are multiple batteries, the +multiple instances of Charger Manager share the same charger_global_desc +and it will manage in-suspend monitoring for all instances of Charger Manager. + +The user needs to provide all the three entries to `struct charger_global_desc` +properly in order to activate in-suspend monitoring: + +`char *rtc_name;` + The name of rtc (e.g., "rtc0") used to wakeup the system from + suspend for Charger Manager. The alarm interrupt (AIE) of the rtc + should be able to wake up the system from suspend. Charger Manager + saves and restores the alarm value and use the previously-defined + alarm if it is going to go off earlier than Charger Manager so that + Charger Manager does not interfere with previously-defined alarms. + +`bool (*rtc_only_wakeup)(void);` + This callback should let CM know whether + the wakeup-from-suspend is caused only by the alarm of "rtc" in the + same struct. If there is any other wakeup source triggered the + wakeup, it should return false. If the "rtc" is the only wakeup + reason, it should return true. + +`bool assume_timer_stops_in_suspend;` + if true, Charger Manager assumes that + the timer (CM uses jiffies as timer) stops during suspend. Then, CM + assumes that the suspend-duration is same as the alarm length. + + +3. How to setup suspend_again +============================= +Charger Manager provides a function "extern bool cm_suspend_again(void)". +When cm_suspend_again is called, it monitors every battery. The suspend_ops +callback of the system's platform_suspend_ops can call cm_suspend_again +function to know whether Charger Manager wants to suspend again or not. +If there are no other devices or tasks that want to use suspend_again +feature, the platform_suspend_ops may directly refer to cm_suspend_again +for its suspend_again callback. + +The cm_suspend_again() returns true (meaning "I want to suspend again") +if the system was woken up by Charger Manager and the polling +(in-suspend monitoring) results in "normal". + +4. Charger-Manager Data (struct charger_desc) +============================================= +For each battery charged independently from other batteries (if a series of +batteries are charged by a single charger, they are counted as one independent +battery), an instance of Charger Manager is attached to it. The following + +struct charger_desc elements: + +`char *psy_name;` + The power-supply-class name of the battery. Default is + "battery" if psy_name is NULL. Users can access the psy entries + at "/sys/class/power_supply/[psy_name]/". + +`enum polling_modes polling_mode;` + CM_POLL_DISABLE: + do not poll this battery. + CM_POLL_ALWAYS: + always poll this battery. + CM_POLL_EXTERNAL_POWER_ONLY: + poll this battery if and only if an external power + source is attached. + CM_POLL_CHARGING_ONLY: + poll this battery if and only if the battery is being charged. + +`unsigned int fullbatt_vchkdrop_ms; / unsigned int fullbatt_vchkdrop_uV;` + If both have non-zero values, Charger Manager will check the + battery voltage drop fullbatt_vchkdrop_ms after the battery is fully + charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger + Manager will try to recharge the battery by disabling and enabling + chargers. Recharge with voltage drop condition only (without delay + condition) is needed to be implemented with hardware interrupts from + fuel gauges or charger devices/chips. + +`unsigned int fullbatt_uV;` + If specified with a non-zero value, Charger Manager assumes + that the battery is full (capacity = 100) if the battery is not being + charged and the battery voltage is equal to or greater than + fullbatt_uV. + +`unsigned int polling_interval_ms;` + Required polling interval in ms. Charger Manager will poll + this battery every polling_interval_ms or more frequently. + +`enum data_source battery_present;` + CM_BATTERY_PRESENT: + assume that the battery exists. + CM_NO_BATTERY: + assume that the battery does not exists. + CM_FUEL_GAUGE: + get battery presence information from fuel gauge. + CM_CHARGER_STAT: + get battery presence from chargers. + +`char **psy_charger_stat;` + An array ending with NULL that has power-supply-class names of + chargers. Each power-supply-class should provide "PRESENT" (if + battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an + external power source is attached or not), and "STATUS" (shows whether + the battery is {"FULL" or not FULL} or {"FULL", "Charging", + "Discharging", "NotCharging"}). + +`int num_charger_regulators; / struct regulator_bulk_data *charger_regulators;` + Regulators representing the chargers in the form for + regulator framework's bulk functions. + +`char *psy_fuel_gauge;` + Power-supply-class name of the fuel gauge. + +`int (*temperature_out_of_range)(int *mC); / bool measure_battery_temp;` + This callback returns 0 if the temperature is safe for charging, + a positive number if it is too hot to charge, and a negative number + if it is too cold to charge. With the variable mC, the callback returns + the temperature in 1/1000 of centigrade. + The source of temperature can be battery or ambient one according to + the value of measure_battery_temp. + + +5. Notify Charger-Manager of charger events: cm_notify_event() +============================================================== +If there is an charger event is required to notify +Charger Manager, a charger device driver that triggers the event can call +cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager. +In the function, psy is the charger driver's power_supply pointer, which is +associated with Charger-Manager. The parameter "type" +is the same as irq's type (enum cm_event_types). The event message "msg" is +optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS". + +6. Other Considerations +======================= + +At the charger/battery-related events such as battery-pulled-out, +charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped, +and others critical to chargers, the system should be configured to wake up. +At least the following should wake up the system from a suspend: +a) charger-on/off b) external-power-in/out c) battery-in/out (while charging) + +It is usually accomplished by configuring the PMIC as a wakeup source. diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt deleted file mode 100644 index 9ff1105e58d6..000000000000 --- a/Documentation/power/charger-manager.txt +++ /dev/null @@ -1,200 +0,0 @@ -Charger Manager - (C) 2011 MyungJoo Ham , GPL - -Charger Manager provides in-kernel battery charger management that -requires temperature monitoring during suspend-to-RAM state -and where each battery may have multiple chargers attached and the userland -wants to look at the aggregated information of the multiple chargers. - -Charger Manager is a platform_driver with power-supply-class entries. -An instance of Charger Manager (a platform-device created with Charger-Manager) -represents an independent battery with chargers. If there are multiple -batteries with their own chargers acting independently in a system, -the system may need multiple instances of Charger Manager. - -1. Introduction -=============== - -Charger Manager supports the following: - -* Support for multiple chargers (e.g., a device with USB, AC, and solar panels) - A system may have multiple chargers (or power sources) and some of - they may be activated at the same time. Each charger may have its - own power-supply-class and each power-supply-class can provide - different information about the battery status. This framework - aggregates charger-related information from multiple sources and - shows combined information as a single power-supply-class. - -* Support for in suspend-to-RAM polling (with suspend_again callback) - While the battery is being charged and the system is in suspend-to-RAM, - we may need to monitor the battery health by looking at the ambient or - battery temperature. We can accomplish this by waking up the system - periodically. However, such a method wakes up devices unnecessarily for - monitoring the battery health and tasks, and user processes that are - supposed to be kept suspended. That, in turn, incurs unnecessary power - consumption and slow down charging process. Or even, such peak power - consumption can stop chargers in the middle of charging - (external power input < device power consumption), which not - only affects the charging time, but the lifespan of the battery. - - Charger Manager provides a function "cm_suspend_again" that can be - used as suspend_again callback of platform_suspend_ops. If the platform - requires tasks other than cm_suspend_again, it may implement its own - suspend_again callback that calls cm_suspend_again in the middle. - Normally, the platform will need to resume and suspend some devices - that are used by Charger Manager. - -* Support for premature full-battery event handling - If the battery voltage drops by "fullbatt_vchkdrop_uV" after - "fullbatt_vchkdrop_ms" from the full-battery event, the framework - restarts charging. This check is also performed while suspended by - setting wakeup time accordingly and using suspend_again. - -* Support for uevent-notify - With the charger-related events, the device sends - notification to users with UEVENT. - -2. Global Charger-Manager Data related with suspend_again -======================================================== -In order to setup Charger Manager with suspend-again feature -(in-suspend monitoring), the user should provide charger_global_desc -with setup_charger_manager(struct charger_global_desc *). -This charger_global_desc data for in-suspend monitoring is global -as the name suggests. Thus, the user needs to provide only once even -if there are multiple batteries. If there are multiple batteries, the -multiple instances of Charger Manager share the same charger_global_desc -and it will manage in-suspend monitoring for all instances of Charger Manager. - -The user needs to provide all the three entries properly in order to activate -in-suspend monitoring: - -struct charger_global_desc { - -char *rtc_name; - : The name of rtc (e.g., "rtc0") used to wakeup the system from - suspend for Charger Manager. The alarm interrupt (AIE) of the rtc - should be able to wake up the system from suspend. Charger Manager - saves and restores the alarm value and use the previously-defined - alarm if it is going to go off earlier than Charger Manager so that - Charger Manager does not interfere with previously-defined alarms. - -bool (*rtc_only_wakeup)(void); - : This callback should let CM know whether - the wakeup-from-suspend is caused only by the alarm of "rtc" in the - same struct. If there is any other wakeup source triggered the - wakeup, it should return false. If the "rtc" is the only wakeup - reason, it should return true. - -bool assume_timer_stops_in_suspend; - : if true, Charger Manager assumes that - the timer (CM uses jiffies as timer) stops during suspend. Then, CM - assumes that the suspend-duration is same as the alarm length. -}; - -3. How to setup suspend_again -============================= -Charger Manager provides a function "extern bool cm_suspend_again(void)". -When cm_suspend_again is called, it monitors every battery. The suspend_ops -callback of the system's platform_suspend_ops can call cm_suspend_again -function to know whether Charger Manager wants to suspend again or not. -If there are no other devices or tasks that want to use suspend_again -feature, the platform_suspend_ops may directly refer to cm_suspend_again -for its suspend_again callback. - -The cm_suspend_again() returns true (meaning "I want to suspend again") -if the system was woken up by Charger Manager and the polling -(in-suspend monitoring) results in "normal". - -4. Charger-Manager Data (struct charger_desc) -============================================= -For each battery charged independently from other batteries (if a series of -batteries are charged by a single charger, they are counted as one independent -battery), an instance of Charger Manager is attached to it. - -struct charger_desc { - -char *psy_name; - : The power-supply-class name of the battery. Default is - "battery" if psy_name is NULL. Users can access the psy entries - at "/sys/class/power_supply/[psy_name]/". - -enum polling_modes polling_mode; - : CM_POLL_DISABLE: do not poll this battery. - CM_POLL_ALWAYS: always poll this battery. - CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if - an external power source is attached. - CM_POLL_CHARGING_ONLY: poll this battery if and only if the - battery is being charged. - -unsigned int fullbatt_vchkdrop_ms; -unsigned int fullbatt_vchkdrop_uV; - : If both have non-zero values, Charger Manager will check the - battery voltage drop fullbatt_vchkdrop_ms after the battery is fully - charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger - Manager will try to recharge the battery by disabling and enabling - chargers. Recharge with voltage drop condition only (without delay - condition) is needed to be implemented with hardware interrupts from - fuel gauges or charger devices/chips. - -unsigned int fullbatt_uV; - : If specified with a non-zero value, Charger Manager assumes - that the battery is full (capacity = 100) if the battery is not being - charged and the battery voltage is equal to or greater than - fullbatt_uV. - -unsigned int polling_interval_ms; - : Required polling interval in ms. Charger Manager will poll - this battery every polling_interval_ms or more frequently. - -enum data_source battery_present; - : CM_BATTERY_PRESENT: assume that the battery exists. - CM_NO_BATTERY: assume that the battery does not exists. - CM_FUEL_GAUGE: get battery presence information from fuel gauge. - CM_CHARGER_STAT: get battery presence from chargers. - -char **psy_charger_stat; - : An array ending with NULL that has power-supply-class names of - chargers. Each power-supply-class should provide "PRESENT" (if - battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an - external power source is attached or not), and "STATUS" (shows whether - the battery is {"FULL" or not FULL} or {"FULL", "Charging", - "Discharging", "NotCharging"}). - -int num_charger_regulators; -struct regulator_bulk_data *charger_regulators; - : Regulators representing the chargers in the form for - regulator framework's bulk functions. - -char *psy_fuel_gauge; - : Power-supply-class name of the fuel gauge. - -int (*temperature_out_of_range)(int *mC); -bool measure_battery_temp; - : This callback returns 0 if the temperature is safe for charging, - a positive number if it is too hot to charge, and a negative number - if it is too cold to charge. With the variable mC, the callback returns - the temperature in 1/1000 of centigrade. - The source of temperature can be battery or ambient one according to - the value of measure_battery_temp. -}; - -5. Notify Charger-Manager of charger events: cm_notify_event() -========================================================= -If there is an charger event is required to notify -Charger Manager, a charger device driver that triggers the event can call -cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager. -In the function, psy is the charger driver's power_supply pointer, which is -associated with Charger-Manager. The parameter "type" -is the same as irq's type (enum cm_event_types). The event message "msg" is -optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS". - -6. Other Considerations -======================= - -At the charger/battery-related events such as battery-pulled-out, -charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped, -and others critical to chargers, the system should be configured to wake up. -At least the following should wake up the system from a suspend: -a) charger-on/off b) external-power-in/out c) battery-in/out (while charging) - -It is usually accomplished by configuring the PMIC as a wakeup source. diff --git a/Documentation/power/drivers-testing.rst b/Documentation/power/drivers-testing.rst new file mode 100644 index 000000000000..e53f1999fc39 --- /dev/null +++ b/Documentation/power/drivers-testing.rst @@ -0,0 +1,51 @@ +==================================================== +Testing suspend and resume support in device drivers +==================================================== + + (C) 2007 Rafael J. Wysocki , GPL + +1. Preparing the test system +============================ + +Unfortunately, to effectively test the support for the system-wide suspend and +resume transitions in a driver, it is necessary to suspend and resume a fully +functional system with this driver loaded. Moreover, that should be done +several times, preferably several times in a row, and separately for hibernation +(aka suspend to disk or STD) and suspend to RAM (STR), because each of these +cases involves slightly different operations and different interactions with +the machine's BIOS. + +Of course, for this purpose the test system has to be known to suspend and +resume without the driver being tested. Thus, if possible, you should first +resolve all suspend/resume-related problems in the test system before you start +testing the new driver. Please see Documentation/power/basic-pm-debugging.rst +for more information about the debugging of suspend/resume functionality. + +2. Testing the driver +===================== + +Once you have resolved the suspend/resume-related problems with your test system +without the new driver, you are ready to test it: + +a) Build the driver as a module, load it and try the test modes of hibernation + (see: Documentation/power/basic-pm-debugging.rst, 1). + +b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and + "platform" modes (see: Documentation/power/basic-pm-debugging.rst, 1). + +c) Compile the driver directly into the kernel and try the test modes of + hibernation. + +d) Attempt to hibernate with the driver compiled directly into the kernel + in the "reboot", "shutdown" and "platform" modes. + +e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.rst, + 2). [As far as the STR tests are concerned, it should not matter whether or + not the driver is built as a module.] + +f) Attempt to suspend to RAM using the s2ram tool with the driver loaded + (see: Documentation/power/basic-pm-debugging.rst, 2). + +Each of the above tests should be repeated several times and the STD tests +should be mixed with the STR tests. If any of them fails, the driver cannot be +regarded as suspend/resume-safe. diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.txt deleted file mode 100644 index 638afdf4d6b8..000000000000 --- a/Documentation/power/drivers-testing.txt +++ /dev/null @@ -1,46 +0,0 @@ -Testing suspend and resume support in device drivers - (C) 2007 Rafael J. Wysocki , GPL - -1. Preparing the test system - -Unfortunately, to effectively test the support for the system-wide suspend and -resume transitions in a driver, it is necessary to suspend and resume a fully -functional system with this driver loaded. Moreover, that should be done -several times, preferably several times in a row, and separately for hibernation -(aka suspend to disk or STD) and suspend to RAM (STR), because each of these -cases involves slightly different operations and different interactions with -the machine's BIOS. - -Of course, for this purpose the test system has to be known to suspend and -resume without the driver being tested. Thus, if possible, you should first -resolve all suspend/resume-related problems in the test system before you start -testing the new driver. Please see Documentation/power/basic-pm-debugging.txt -for more information about the debugging of suspend/resume functionality. - -2. Testing the driver - -Once you have resolved the suspend/resume-related problems with your test system -without the new driver, you are ready to test it: - -a) Build the driver as a module, load it and try the test modes of hibernation - (see: Documentation/power/basic-pm-debugging.txt, 1). - -b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and - "platform" modes (see: Documentation/power/basic-pm-debugging.txt, 1). - -c) Compile the driver directly into the kernel and try the test modes of - hibernation. - -d) Attempt to hibernate with the driver compiled directly into the kernel - in the "reboot", "shutdown" and "platform" modes. - -e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.txt, - 2). [As far as the STR tests are concerned, it should not matter whether or - not the driver is built as a module.] - -f) Attempt to suspend to RAM using the s2ram tool with the driver loaded - (see: Documentation/power/basic-pm-debugging.txt, 2). - -Each of the above tests should be repeated several times and the STD tests -should be mixed with the STR tests. If any of them fails, the driver cannot be -regarded as suspend/resume-safe. diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst new file mode 100644 index 000000000000..90a345d57ae9 --- /dev/null +++ b/Documentation/power/energy-model.rst @@ -0,0 +1,147 @@ +==================== +Energy Model of CPUs +==================== + +1. Overview +----------- + +The Energy Model (EM) framework serves as an interface between drivers knowing +the power consumed by CPUs at various performance levels, and the kernel +subsystems willing to use that information to make energy-aware decisions. + +The source of the information about the power consumed by CPUs can vary greatly +from one platform to another. These power costs can be estimated using +devicetree data in some cases. In others, the firmware will know better. +Alternatively, userspace might be best positioned. And so on. In order to avoid +each and every client subsystem to re-implement support for each and every +possible source of information on its own, the EM framework intervenes as an +abstraction layer which standardizes the format of power cost tables in the +kernel, hence enabling to avoid redundant work. + +The figure below depicts an example of drivers (Arm-specific here, but the +approach is applicable to any architecture) providing power costs to the EM +framework, and interested clients reading the data from it:: + + +---------------+ +-----------------+ +---------------+ + | Thermal (IPA) | | Scheduler (EAS) | | Other | + +---------------+ +-----------------+ +---------------+ + | | em_pd_energy() | + | | em_cpu_get() | + +---------+ | +---------+ + | | | + v v v + +---------------------+ + | Energy Model | + | Framework | + +---------------------+ + ^ ^ ^ + | | | em_register_perf_domain() + +----------+ | +---------+ + | | | + +---------------+ +---------------+ +--------------+ + | cpufreq-dt | | arm_scmi | | Other | + +---------------+ +---------------+ +--------------+ + ^ ^ ^ + | | | + +--------------+ +---------------+ +--------------+ + | Device Tree | | Firmware | | ? | + +--------------+ +---------------+ +--------------+ + +The EM framework manages power cost tables per 'performance domain' in the +system. A performance domain is a group of CPUs whose performance is scaled +together. Performance domains generally have a 1-to-1 mapping with CPUFreq +policies. All CPUs in a performance domain are required to have the same +micro-architecture. CPUs in different performance domains can have different +micro-architectures. + + +2. Core APIs +------------ + +2.1 Config options +^^^^^^^^^^^^^^^^^^ + +CONFIG_ENERGY_MODEL must be enabled to use the EM framework. + + +2.2 Registration of performance domains +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Drivers are expected to register performance domains into the EM framework by +calling the following API:: + + int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, + struct em_data_callback *cb); + +Drivers must specify the CPUs of the performance domains using the cpumask +argument, and provide a callback function returning tuples +for each capacity state. The callback function provided by the driver is free +to fetch data from any relevant location (DT, firmware, ...), and by any mean +deemed necessary. See Section 3. for an example of driver implementing this +callback, and kernel/power/energy_model.c for further documentation on this +API. + + +2.3 Accessing performance domains +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Subsystems interested in the energy model of a CPU can retrieve it using the +em_cpu_get() API. The energy model tables are allocated once upon creation of +the performance domains, and kept in memory untouched. + +The energy consumed by a performance domain can be estimated using the +em_pd_energy() API. The estimation is performed assuming that the schedutil +CPUfreq governor is in use. + +More details about the above APIs can be found in include/linux/energy_model.h. + + +3. Example driver +----------------- + +This section provides a simple example of a CPUFreq driver registering a +performance domain in the Energy Model framework using the (fake) 'foo' +protocol. The driver implements an est_power() function to be provided to the +EM framework:: + + -> drivers/cpufreq/foo_cpufreq.c + + 01 static int est_power(unsigned long *mW, unsigned long *KHz, int cpu) + 02 { + 03 long freq, power; + 04 + 05 /* Use the 'foo' protocol to ceil the frequency */ + 06 freq = foo_get_freq_ceil(cpu, *KHz); + 07 if (freq < 0); + 08 return freq; + 09 + 10 /* Estimate the power cost for the CPU at the relevant freq. */ + 11 power = foo_estimate_power(cpu, freq); + 12 if (power < 0); + 13 return power; + 14 + 15 /* Return the values to the EM framework */ + 16 *mW = power; + 17 *KHz = freq; + 18 + 19 return 0; + 20 } + 21 + 22 static int foo_cpufreq_init(struct cpufreq_policy *policy) + 23 { + 24 struct em_data_callback em_cb = EM_DATA_CB(est_power); + 25 int nr_opp, ret; + 26 + 27 /* Do the actual CPUFreq init work ... */ + 28 ret = do_foo_cpufreq_init(policy); + 29 if (ret) + 30 return ret; + 31 + 32 /* Find the number of OPPs for this policy */ + 33 nr_opp = foo_get_nr_opp(policy); + 34 + 35 /* And register the new performance domain */ + 36 em_register_perf_domain(policy->cpus, nr_opp, &em_cb); + 37 + 38 return 0; + 39 } diff --git a/Documentation/power/energy-model.txt b/Documentation/power/energy-model.txt deleted file mode 100644 index a2b0ae4c76bd..000000000000 --- a/Documentation/power/energy-model.txt +++ /dev/null @@ -1,144 +0,0 @@ - ==================== - Energy Model of CPUs - ==================== - -1. Overview ------------ - -The Energy Model (EM) framework serves as an interface between drivers knowing -the power consumed by CPUs at various performance levels, and the kernel -subsystems willing to use that information to make energy-aware decisions. - -The source of the information about the power consumed by CPUs can vary greatly -from one platform to another. These power costs can be estimated using -devicetree data in some cases. In others, the firmware will know better. -Alternatively, userspace might be best positioned. And so on. In order to avoid -each and every client subsystem to re-implement support for each and every -possible source of information on its own, the EM framework intervenes as an -abstraction layer which standardizes the format of power cost tables in the -kernel, hence enabling to avoid redundant work. - -The figure below depicts an example of drivers (Arm-specific here, but the -approach is applicable to any architecture) providing power costs to the EM -framework, and interested clients reading the data from it. - - +---------------+ +-----------------+ +---------------+ - | Thermal (IPA) | | Scheduler (EAS) | | Other | - +---------------+ +-----------------+ +---------------+ - | | em_pd_energy() | - | | em_cpu_get() | - +---------+ | +---------+ - | | | - v v v - +---------------------+ - | Energy Model | - | Framework | - +---------------------+ - ^ ^ ^ - | | | em_register_perf_domain() - +----------+ | +---------+ - | | | - +---------------+ +---------------+ +--------------+ - | cpufreq-dt | | arm_scmi | | Other | - +---------------+ +---------------+ +--------------+ - ^ ^ ^ - | | | - +--------------+ +---------------+ +--------------+ - | Device Tree | | Firmware | | ? | - +--------------+ +---------------+ +--------------+ - -The EM framework manages power cost tables per 'performance domain' in the -system. A performance domain is a group of CPUs whose performance is scaled -together. Performance domains generally have a 1-to-1 mapping with CPUFreq -policies. All CPUs in a performance domain are required to have the same -micro-architecture. CPUs in different performance domains can have different -micro-architectures. - - -2. Core APIs ------------- - - 2.1 Config options - -CONFIG_ENERGY_MODEL must be enabled to use the EM framework. - - - 2.2 Registration of performance domains - -Drivers are expected to register performance domains into the EM framework by -calling the following API: - - int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb); - -Drivers must specify the CPUs of the performance domains using the cpumask -argument, and provide a callback function returning tuples -for each capacity state. The callback function provided by the driver is free -to fetch data from any relevant location (DT, firmware, ...), and by any mean -deemed necessary. See Section 3. for an example of driver implementing this -callback, and kernel/power/energy_model.c for further documentation on this -API. - - - 2.3 Accessing performance domains - -Subsystems interested in the energy model of a CPU can retrieve it using the -em_cpu_get() API. The energy model tables are allocated once upon creation of -the performance domains, and kept in memory untouched. - -The energy consumed by a performance domain can be estimated using the -em_pd_energy() API. The estimation is performed assuming that the schedutil -CPUfreq governor is in use. - -More details about the above APIs can be found in include/linux/energy_model.h. - - -3. Example driver ------------------ - -This section provides a simple example of a CPUFreq driver registering a -performance domain in the Energy Model framework using the (fake) 'foo' -protocol. The driver implements an est_power() function to be provided to the -EM framework. - - -> drivers/cpufreq/foo_cpufreq.c - -01 static int est_power(unsigned long *mW, unsigned long *KHz, int cpu) -02 { -03 long freq, power; -04 -05 /* Use the 'foo' protocol to ceil the frequency */ -06 freq = foo_get_freq_ceil(cpu, *KHz); -07 if (freq < 0); -08 return freq; -09 -10 /* Estimate the power cost for the CPU at the relevant freq. */ -11 power = foo_estimate_power(cpu, freq); -12 if (power < 0); -13 return power; -14 -15 /* Return the values to the EM framework */ -16 *mW = power; -17 *KHz = freq; -18 -19 return 0; -20 } -21 -22 static int foo_cpufreq_init(struct cpufreq_policy *policy) -23 { -24 struct em_data_callback em_cb = EM_DATA_CB(est_power); -25 int nr_opp, ret; -26 -27 /* Do the actual CPUFreq init work ... */ -28 ret = do_foo_cpufreq_init(policy); -29 if (ret) -30 return ret; -31 -32 /* Find the number of OPPs for this policy */ -33 nr_opp = foo_get_nr_opp(policy); -34 -35 /* And register the new performance domain */ -36 em_register_perf_domain(policy->cpus, nr_opp, &em_cb); -37 -38 return 0; -39 } diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst new file mode 100644 index 000000000000..ef110fe55e82 --- /dev/null +++ b/Documentation/power/freezing-of-tasks.rst @@ -0,0 +1,244 @@ +================= +Freezing of tasks +================= + +(C) 2007 Rafael J. Wysocki , GPL + +I. What is the freezing of tasks? +================================= + +The freezing of tasks is a mechanism by which user space processes and some +kernel threads are controlled during hibernation or system-wide suspend (on some +architectures). + +II. How does it work? +===================== + +There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN +and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have +PF_NOFREEZE unset (all user space processes and some kernel threads) are +regarded as 'freezable' and treated in a special way before the system enters a +suspend state as well as before a hibernation image is created (in what follows +we only consider hibernation, but the description also applies to suspend). + +Namely, as the first step of the hibernation procedure the function +freeze_processes() (defined in kernel/power/process.c) is called. A system-wide +variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate +whether the system is to undergo a freezing operation. And freeze_processes() +sets this variable. After this, it executes try_to_freeze_tasks() that sends a +fake signal to all user space processes, and wakes up all the kernel threads. +All freezable tasks must react to that by calling try_to_freeze(), which +results in a call to __refrigerator() (defined in kernel/freezer.c), which sets +the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes +it loop until PF_FROZEN is cleared for it. Then, we say that the task is +'frozen' and therefore the set of functions handling this mechanism is referred +to as 'the freezer' (these functions are defined in kernel/power/process.c, +kernel/freezer.c & include/linux/freezer.h). User space processes are generally +frozen before kernel threads. + +__refrigerator() must not be called directly. Instead, use the +try_to_freeze() function (defined in include/linux/freezer.h), that checks +if the task is to be frozen and makes the task enter __refrigerator(). + +For user space processes try_to_freeze() is called automatically from the +signal-handling code, but the freezable kernel threads need to call it +explicitly in suitable places or use the wait_event_freezable() or +wait_event_freezable_timeout() macros (defined in include/linux/freezer.h) +that combine interruptible sleep with checking if the task is to be frozen and +calling try_to_freeze(). The main loop of a freezable kernel thread may look +like the following one:: + + set_freezable(); + do { + hub_events(); + wait_event_freezable(khubd_wait, + !list_empty(&hub_event_list) || + kthread_should_stop()); + } while (!kthread_should_stop() || !list_empty(&hub_event_list)); + +(from drivers/usb/core/hub.c::hub_thread()). + +If a freezable kernel thread fails to call try_to_freeze() after the freezer has +initiated a freezing operation, the freezing of tasks will fail and the entire +hibernation operation will be cancelled. For this reason, freezable kernel +threads must call try_to_freeze() somewhere or use one of the +wait_event_freezable() and wait_event_freezable_timeout() macros. + +After the system memory state has been restored from a hibernation image and +devices have been reinitialized, the function thaw_processes() is called in +order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that +have been frozen leave __refrigerator() and continue running. + + +Rationale behind the functions dealing with freezing and thawing of tasks +------------------------------------------------------------------------- + +freeze_processes(): + - freezes only userspace tasks + +freeze_kernel_threads(): + - freezes all tasks (including kernel threads) because we can't freeze + kernel threads without freezing userspace tasks + +thaw_kernel_threads(): + - thaws only kernel threads; this is particularly useful if we need to do + anything special in between thawing of kernel threads and thawing of + userspace tasks, or if we want to postpone the thawing of userspace tasks + +thaw_processes(): + - thaws all tasks (including kernel threads) because we can't thaw userspace + tasks without thawing kernel threads + + +III. Which kernel threads are freezable? +======================================== + +Kernel threads are not freezable by default. However, a kernel thread may clear +PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE +directly is not allowed). From this point it is regarded as freezable +and must call try_to_freeze() in a suitable place. + +IV. Why do we do that? +====================== + +Generally speaking, there is a couple of reasons to use the freezing of tasks: + +1. The principal reason is to prevent filesystems from being damaged after + hibernation. At the moment we have no simple means of checkpointing + filesystems, so if there are any modifications made to filesystem data and/or + metadata on disks, we cannot bring them back to the state from before the + modifications. At the same time each hibernation image contains some + filesystem-related information that must be consistent with the state of the + on-disk data and metadata after the system memory state has been restored + from the image (otherwise the filesystems will be damaged in a nasty way, + usually making them almost impossible to repair). We therefore freeze + tasks that might cause the on-disk filesystems' data and metadata to be + modified after the hibernation image has been created and before the + system is finally powered off. The majority of these are user space + processes, but if any of the kernel threads may cause something like this + to happen, they have to be freezable. + +2. Next, to create the hibernation image we need to free a sufficient amount of + memory (approximately 50% of available RAM) and we need to do that before + devices are deactivated, because we generally need them for swapping out. + Then, after the memory for the image has been freed, we don't want tasks + to allocate additional memory and we prevent them from doing that by + freezing them earlier. [Of course, this also means that device drivers + should not allocate substantial amounts of memory from their .suspend() + callbacks before hibernation, but this is a separate issue.] + +3. The third reason is to prevent user space processes and some kernel threads + from interfering with the suspending and resuming of devices. A user space + process running on a second CPU while we are suspending devices may, for + example, be troublesome and without the freezing of tasks we would need some + safeguards against race conditions that might occur in such a case. + +Although Linus Torvalds doesn't like the freezing of tasks, he said this in one +of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): + +"RJW:> Why we freeze tasks at all or why we freeze kernel threads? + +Linus: In many ways, 'at all'. + +I **do** realize the IO request queue issues, and that we cannot actually do +s2ram with some devices in the middle of a DMA. So we want to be able to +avoid *that*, there's no question about that. And I suspect that stopping +user threads and then waiting for a sync is practically one of the easier +ways to do so. + +So in practice, the 'at all' may become a 'why freeze kernel threads?' and +freezing user threads I don't find really objectionable." + +Still, there are kernel threads that may want to be freezable. For example, if +a kernel thread that belongs to a device driver accesses the device directly, it +in principle needs to know when the device is suspended, so that it doesn't try +to access it at that time. However, if the kernel thread is freezable, it will +be frozen before the driver's .suspend() callback is executed and it will be +thawed after the driver's .resume() callback has run, so it won't be accessing +the device while it's suspended. + +4. Another reason for freezing tasks is to prevent user space processes from + realizing that hibernation (or suspend) operation takes place. Ideally, user + space processes should not notice that such a system-wide operation has + occurred and should continue running without any problems after the restore + (or resume from suspend). Unfortunately, in the most general case this + is quite difficult to achieve without the freezing of tasks. Consider, + for example, a process that depends on all CPUs being online while it's + running. Since we need to disable nonboot CPUs during the hibernation, + if this process is not frozen, it may notice that the number of CPUs has + changed and may start to work incorrectly because of that. + +V. Are there any problems related to the freezing of tasks? +=========================================================== + +Yes, there are. + +First of all, the freezing of kernel threads may be tricky if they depend one +on another. For example, if kernel thread A waits for a completion (in the +TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B +and B is frozen in the meantime, then A will be blocked until B is thawed, which +may be undesirable. That's why kernel threads are not freezable by default. + +Second, there are the following two problems related to the freezing of user +space processes: + +1. Putting processes into an uninterruptible sleep distorts the load average. +2. Now that we have FUSE, plus the framework for doing device drivers in + userspace, it gets even more complicated because some userspace processes are + now doing the sorts of things that kernel threads do + (https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). + +The problem 1. seems to be fixable, although it hasn't been fixed so far. The +other one is more serious, but it seems that we can work around it by using +hibernation (and suspend) notifiers (in that case, though, we won't be able to +avoid the realization by the user space processes that the hibernation is taking +place). + +There are also problems that the freezing of tasks tends to expose, although +they are not directly related to it. For example, if request_firmware() is +called from a device driver's .resume() routine, it will timeout and eventually +fail, because the user land process that should respond to the request is frozen +at this point. So, seemingly, the failure is due to the freezing of tasks. +Suppose, however, that the firmware file is located on a filesystem accessible +only through another device that hasn't been resumed yet. In that case, +request_firmware() will fail regardless of whether or not the freezing of tasks +is used. Consequently, the problem is not really related to the freezing of +tasks, since it generally exists anyway. + +A driver must have all firmwares it may need in RAM before suspend() is called. +If keeping them is not practical, for example due to their size, they must be +requested early enough using the suspend notifier API described in +Documentation/driver-api/pm/notifiers.rst. + +VI. Are there any precautions to be taken to prevent freezing failures? +======================================================================= + +Yes, there are. + +First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code +from system-wide sleep such as suspend/hibernation is not encouraged. +If possible, that piece of code must instead hook onto the suspend/hibernation +notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code +(kernel/cpu.c) for an example. + +However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary, +it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since +that could lead to freezing failures, because if the suspend/hibernate code +successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed +to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE +state. As a consequence, the freezer would not be able to freeze that task, +leading to freezing failure. + +However, the [un]lock_system_sleep() APIs are safe to use in this scenario, +since they ask the freezer to skip freezing this task, since it is anyway +"frozen enough" as it is blocked on 'system_transition_mutex', which will be released +only after the entire suspend/hibernation sequence is complete. +So, to summarize, use [un]lock_system_sleep() instead of directly using +mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures. + +V. Miscellaneous +================ + +/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze +all user space processes or all freezable kernel threads, in unit of millisecond. +The default value is 20000, with range of unsigned integer. diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt deleted file mode 100644 index cd283190855a..000000000000 --- a/Documentation/power/freezing-of-tasks.txt +++ /dev/null @@ -1,231 +0,0 @@ -Freezing of tasks - (C) 2007 Rafael J. Wysocki , GPL - -I. What is the freezing of tasks? - -The freezing of tasks is a mechanism by which user space processes and some -kernel threads are controlled during hibernation or system-wide suspend (on some -architectures). - -II. How does it work? - -There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN -and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have -PF_NOFREEZE unset (all user space processes and some kernel threads) are -regarded as 'freezable' and treated in a special way before the system enters a -suspend state as well as before a hibernation image is created (in what follows -we only consider hibernation, but the description also applies to suspend). - -Namely, as the first step of the hibernation procedure the function -freeze_processes() (defined in kernel/power/process.c) is called. A system-wide -variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate -whether the system is to undergo a freezing operation. And freeze_processes() -sets this variable. After this, it executes try_to_freeze_tasks() that sends a -fake signal to all user space processes, and wakes up all the kernel threads. -All freezable tasks must react to that by calling try_to_freeze(), which -results in a call to __refrigerator() (defined in kernel/freezer.c), which sets -the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes -it loop until PF_FROZEN is cleared for it. Then, we say that the task is -'frozen' and therefore the set of functions handling this mechanism is referred -to as 'the freezer' (these functions are defined in kernel/power/process.c, -kernel/freezer.c & include/linux/freezer.h). User space processes are generally -frozen before kernel threads. - -__refrigerator() must not be called directly. Instead, use the -try_to_freeze() function (defined in include/linux/freezer.h), that checks -if the task is to be frozen and makes the task enter __refrigerator(). - -For user space processes try_to_freeze() is called automatically from the -signal-handling code, but the freezable kernel threads need to call it -explicitly in suitable places or use the wait_event_freezable() or -wait_event_freezable_timeout() macros (defined in include/linux/freezer.h) -that combine interruptible sleep with checking if the task is to be frozen and -calling try_to_freeze(). The main loop of a freezable kernel thread may look -like the following one: - - set_freezable(); - do { - hub_events(); - wait_event_freezable(khubd_wait, - !list_empty(&hub_event_list) || - kthread_should_stop()); - } while (!kthread_should_stop() || !list_empty(&hub_event_list)); - -(from drivers/usb/core/hub.c::hub_thread()). - -If a freezable kernel thread fails to call try_to_freeze() after the freezer has -initiated a freezing operation, the freezing of tasks will fail and the entire -hibernation operation will be cancelled. For this reason, freezable kernel -threads must call try_to_freeze() somewhere or use one of the -wait_event_freezable() and wait_event_freezable_timeout() macros. - -After the system memory state has been restored from a hibernation image and -devices have been reinitialized, the function thaw_processes() is called in -order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that -have been frozen leave __refrigerator() and continue running. - - -Rationale behind the functions dealing with freezing and thawing of tasks: -------------------------------------------------------------------------- - -freeze_processes(): - - freezes only userspace tasks - -freeze_kernel_threads(): - - freezes all tasks (including kernel threads) because we can't freeze - kernel threads without freezing userspace tasks - -thaw_kernel_threads(): - - thaws only kernel threads; this is particularly useful if we need to do - anything special in between thawing of kernel threads and thawing of - userspace tasks, or if we want to postpone the thawing of userspace tasks - -thaw_processes(): - - thaws all tasks (including kernel threads) because we can't thaw userspace - tasks without thawing kernel threads - - -III. Which kernel threads are freezable? - -Kernel threads are not freezable by default. However, a kernel thread may clear -PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE -directly is not allowed). From this point it is regarded as freezable -and must call try_to_freeze() in a suitable place. - -IV. Why do we do that? - -Generally speaking, there is a couple of reasons to use the freezing of tasks: - -1. The principal reason is to prevent filesystems from being damaged after -hibernation. At the moment we have no simple means of checkpointing -filesystems, so if there are any modifications made to filesystem data and/or -metadata on disks, we cannot bring them back to the state from before the -modifications. At the same time each hibernation image contains some -filesystem-related information that must be consistent with the state of the -on-disk data and metadata after the system memory state has been restored from -the image (otherwise the filesystems will be damaged in a nasty way, usually -making them almost impossible to repair). We therefore freeze tasks that might -cause the on-disk filesystems' data and metadata to be modified after the -hibernation image has been created and before the system is finally powered off. -The majority of these are user space processes, but if any of the kernel threads -may cause something like this to happen, they have to be freezable. - -2. Next, to create the hibernation image we need to free a sufficient amount of -memory (approximately 50% of available RAM) and we need to do that before -devices are deactivated, because we generally need them for swapping out. Then, -after the memory for the image has been freed, we don't want tasks to allocate -additional memory and we prevent them from doing that by freezing them earlier. -[Of course, this also means that device drivers should not allocate substantial -amounts of memory from their .suspend() callbacks before hibernation, but this -is a separate issue.] - -3. The third reason is to prevent user space processes and some kernel threads -from interfering with the suspending and resuming of devices. A user space -process running on a second CPU while we are suspending devices may, for -example, be troublesome and without the freezing of tasks we would need some -safeguards against race conditions that might occur in such a case. - -Although Linus Torvalds doesn't like the freezing of tasks, he said this in one -of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): - -"RJW:> Why we freeze tasks at all or why we freeze kernel threads? - -Linus: In many ways, 'at all'. - -I _do_ realize the IO request queue issues, and that we cannot actually do -s2ram with some devices in the middle of a DMA. So we want to be able to -avoid *that*, there's no question about that. And I suspect that stopping -user threads and then waiting for a sync is practically one of the easier -ways to do so. - -So in practice, the 'at all' may become a 'why freeze kernel threads?' and -freezing user threads I don't find really objectionable." - -Still, there are kernel threads that may want to be freezable. For example, if -a kernel thread that belongs to a device driver accesses the device directly, it -in principle needs to know when the device is suspended, so that it doesn't try -to access it at that time. However, if the kernel thread is freezable, it will -be frozen before the driver's .suspend() callback is executed and it will be -thawed after the driver's .resume() callback has run, so it won't be accessing -the device while it's suspended. - -4. Another reason for freezing tasks is to prevent user space processes from -realizing that hibernation (or suspend) operation takes place. Ideally, user -space processes should not notice that such a system-wide operation has occurred -and should continue running without any problems after the restore (or resume -from suspend). Unfortunately, in the most general case this is quite difficult -to achieve without the freezing of tasks. Consider, for example, a process -that depends on all CPUs being online while it's running. Since we need to -disable nonboot CPUs during the hibernation, if this process is not frozen, it -may notice that the number of CPUs has changed and may start to work incorrectly -because of that. - -V. Are there any problems related to the freezing of tasks? - -Yes, there are. - -First of all, the freezing of kernel threads may be tricky if they depend one -on another. For example, if kernel thread A waits for a completion (in the -TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B -and B is frozen in the meantime, then A will be blocked until B is thawed, which -may be undesirable. That's why kernel threads are not freezable by default. - -Second, there are the following two problems related to the freezing of user -space processes: -1. Putting processes into an uninterruptible sleep distorts the load average. -2. Now that we have FUSE, plus the framework for doing device drivers in -userspace, it gets even more complicated because some userspace processes are -now doing the sorts of things that kernel threads do -(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). - -The problem 1. seems to be fixable, although it hasn't been fixed so far. The -other one is more serious, but it seems that we can work around it by using -hibernation (and suspend) notifiers (in that case, though, we won't be able to -avoid the realization by the user space processes that the hibernation is taking -place). - -There are also problems that the freezing of tasks tends to expose, although -they are not directly related to it. For example, if request_firmware() is -called from a device driver's .resume() routine, it will timeout and eventually -fail, because the user land process that should respond to the request is frozen -at this point. So, seemingly, the failure is due to the freezing of tasks. -Suppose, however, that the firmware file is located on a filesystem accessible -only through another device that hasn't been resumed yet. In that case, -request_firmware() will fail regardless of whether or not the freezing of tasks -is used. Consequently, the problem is not really related to the freezing of -tasks, since it generally exists anyway. - -A driver must have all firmwares it may need in RAM before suspend() is called. -If keeping them is not practical, for example due to their size, they must be -requested early enough using the suspend notifier API described in -Documentation/driver-api/pm/notifiers.rst. - -VI. Are there any precautions to be taken to prevent freezing failures? - -Yes, there are. - -First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code -from system-wide sleep such as suspend/hibernation is not encouraged. -If possible, that piece of code must instead hook onto the suspend/hibernation -notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code -(kernel/cpu.c) for an example. - -However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary, -it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since -that could lead to freezing failures, because if the suspend/hibernate code -successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed -to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE -state. As a consequence, the freezer would not be able to freeze that task, -leading to freezing failure. - -However, the [un]lock_system_sleep() APIs are safe to use in this scenario, -since they ask the freezer to skip freezing this task, since it is anyway -"frozen enough" as it is blocked on 'system_transition_mutex', which will be released -only after the entire suspend/hibernation sequence is complete. -So, to summarize, use [un]lock_system_sleep() instead of directly using -mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures. - -V. Miscellaneous -/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze -all user space processes or all freezable kernel threads, in unit of millisecond. -The default value is 20000, with range of unsigned integer. diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst new file mode 100644 index 000000000000..20415f21e48a --- /dev/null +++ b/Documentation/power/index.rst @@ -0,0 +1,46 @@ +:orphan: + +================ +Power Management +================ + +.. toctree:: + :maxdepth: 1 + + apm-acpi + basic-pm-debugging + charger-manager + drivers-testing + energy-model + freezing-of-tasks + interface + opp + pci + pm_qos_interface + power_supply_class + runtime_pm + s2ram + suspend-and-cpuhotplug + suspend-and-interrupts + swsusp-and-swap-files + swsusp-dmcrypt + swsusp + video + tricks + + userland-swsusp + + powercap/powercap + + regulator/consumer + regulator/design + regulator/machine + regulator/overview + regulator/regulator + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/power/interface.rst b/Documentation/power/interface.rst new file mode 100644 index 000000000000..8d270ed27228 --- /dev/null +++ b/Documentation/power/interface.rst @@ -0,0 +1,79 @@ +=========================================== +Power Management Interface for System Sleep +=========================================== + +Copyright (c) 2016 Intel Corp., Rafael J. Wysocki + +The power management subsystem provides userspace with a unified sysfs interface +for system sleep regardless of the underlying system architecture or platform. +The interface is located in the /sys/power/ directory (assuming that sysfs is +mounted at /sys). + +/sys/power/state is the system sleep state control file. + +Reading from it returns a list of supported sleep states, encoded as: + +- 'freeze' (Suspend-to-Idle) +- 'standby' (Power-On Suspend) +- 'mem' (Suspend-to-RAM) +- 'disk' (Suspend-to-Disk) + +Suspend-to-Idle is always supported. Suspend-to-Disk is always supported +too as long the kernel has been configured to support hibernation at all +(ie. CONFIG_HIBERNATION is set in the kernel configuration file). Support +for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the +platform. + +If one of the strings listed in /sys/power/state is written to it, the system +will attempt to transition into the corresponding sleep state. Refer to +Documentation/admin-guide/pm/sleep-states.rst for a description of each of +those states. + +/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk). +Specifically, it tells the kernel what to do after creating a hibernation image. + +Reading from it returns a list of supported options encoded as: + +- 'platform' (put the system into sleep using a platform-provided method) +- 'shutdown' (shut the system down) +- 'reboot' (reboot the system) +- 'suspend' (trigger a Suspend-to-RAM transition) +- 'test_resume' (resume-after-hibernation test mode) + +The currently selected option is printed in square brackets. + +The 'platform' option is only available if the platform provides a special +mechanism to put the system to sleep after creating a hibernation image (ACPI +does that, for example). The 'suspend' option is available if Suspend-to-RAM +is supported. Refer to Documentation/power/basic-pm-debugging.rst for the +description of the 'test_resume' option. + +To select an option, write the string representing it to /sys/power/disk. + +/sys/power/image_size controls the size of hibernation images. + +It can be written a string representing a non-negative integer that will be +used as a best-effort upper limit of the image size, in bytes. The hibernation +core will do its best to ensure that the image size will not exceed that number. +However, if that turns out to be impossible to achieve, a hibernation image will +still be created and its size will be as small as possible. In particular, +writing '0' to this file will enforce hibernation images to be as small as +possible. + +Reading from this file returns the current image size limit, which is set to +around 2/5 of available RAM by default. + +/sys/power/pm_trace controls the PM trace mechanism saving the last suspend +or resume event point in the RTC across reboots. + +It helps to debug hard lockups or reboots due to device driver failures that +occur during system suspend or resume (which is more common) more effectively. + +If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume +event point in turn will be stored in the RTC memory (overwriting the actual +RTC information), so it will survive a system crash if one occurs right after +storing it and it can be used later to identify the driver that caused the crash +to happen (see Documentation/power/s2ram.rst for more information). + +Initially it contains '0' which may be changed to '1' by writing a string +representing a nonzero integer into it. diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt deleted file mode 100644 index 27df7f98668a..000000000000 --- a/Documentation/power/interface.txt +++ /dev/null @@ -1,77 +0,0 @@ -Power Management Interface for System Sleep - -Copyright (c) 2016 Intel Corp., Rafael J. Wysocki - -The power management subsystem provides userspace with a unified sysfs interface -for system sleep regardless of the underlying system architecture or platform. -The interface is located in the /sys/power/ directory (assuming that sysfs is -mounted at /sys). - -/sys/power/state is the system sleep state control file. - -Reading from it returns a list of supported sleep states, encoded as: - -'freeze' (Suspend-to-Idle) -'standby' (Power-On Suspend) -'mem' (Suspend-to-RAM) -'disk' (Suspend-to-Disk) - -Suspend-to-Idle is always supported. Suspend-to-Disk is always supported -too as long the kernel has been configured to support hibernation at all -(ie. CONFIG_HIBERNATION is set in the kernel configuration file). Support -for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the -platform. - -If one of the strings listed in /sys/power/state is written to it, the system -will attempt to transition into the corresponding sleep state. Refer to -Documentation/admin-guide/pm/sleep-states.rst for a description of each of -those states. - -/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk). -Specifically, it tells the kernel what to do after creating a hibernation image. - -Reading from it returns a list of supported options encoded as: - -'platform' (put the system into sleep using a platform-provided method) -'shutdown' (shut the system down) -'reboot' (reboot the system) -'suspend' (trigger a Suspend-to-RAM transition) -'test_resume' (resume-after-hibernation test mode) - -The currently selected option is printed in square brackets. - -The 'platform' option is only available if the platform provides a special -mechanism to put the system to sleep after creating a hibernation image (ACPI -does that, for example). The 'suspend' option is available if Suspend-to-RAM -is supported. Refer to Documentation/power/basic-pm-debugging.txt for the -description of the 'test_resume' option. - -To select an option, write the string representing it to /sys/power/disk. - -/sys/power/image_size controls the size of hibernation images. - -It can be written a string representing a non-negative integer that will be -used as a best-effort upper limit of the image size, in bytes. The hibernation -core will do its best to ensure that the image size will not exceed that number. -However, if that turns out to be impossible to achieve, a hibernation image will -still be created and its size will be as small as possible. In particular, -writing '0' to this file will enforce hibernation images to be as small as -possible. - -Reading from this file returns the current image size limit, which is set to -around 2/5 of available RAM by default. - -/sys/power/pm_trace controls the PM trace mechanism saving the last suspend -or resume event point in the RTC across reboots. - -It helps to debug hard lockups or reboots due to device driver failures that -occur during system suspend or resume (which is more common) more effectively. - -If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume -event point in turn will be stored in the RTC memory (overwriting the actual -RTC information), so it will survive a system crash if one occurs right after -storing it and it can be used later to identify the driver that caused the crash -to happen (see Documentation/power/s2ram.txt for more information). - -Initially it contains '0' which may be changed to '1' by writing a string -representing a nonzero integer into it. diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst new file mode 100644 index 000000000000..b3cf1def9dee --- /dev/null +++ b/Documentation/power/opp.rst @@ -0,0 +1,379 @@ +========================================== +Operating Performance Points (OPP) Library +========================================== + +(C) 2009-2010 Nishanth Menon , Texas Instruments Incorporated + +.. Contents + + 1. Introduction + 2. Initial OPP List Registration + 3. OPP Search Functions + 4. OPP Availability Control Functions + 5. OPP Data Retrieval Functions + 6. Data Structures + +1. Introduction +=============== + +1.1 What is an Operating Performance Point (OPP)? +------------------------------------------------- + +Complex SoCs of today consists of a multiple sub-modules working in conjunction. +In an operational system executing varied use cases, not all modules in the SoC +need to function at their highest performing frequency all the time. To +facilitate this, sub-modules in a SoC are grouped into domains, allowing some +domains to run at lower voltage and frequency while other domains run at +voltage/frequency pairs that are higher. + +The set of discrete tuples consisting of frequency and voltage pairs that +the device will support per domain are called Operating Performance Points or +OPPs. + +As an example: + +Let us consider an MPU device which supports the following: +{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V}, +{1GHz at minimum voltage of 1.3V} + +We can represent these as three OPPs as the following {Hz, uV} tuples: + +- {300000000, 1000000} +- {800000000, 1200000} +- {1000000000, 1300000} + +1.2 Operating Performance Points Library +---------------------------------------- + +OPP library provides a set of helper functions to organize and query the OPP +information. The library is located in drivers/base/power/opp.c and the header +is located in include/linux/pm_opp.h. OPP library can be enabled by enabling +CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on +CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to +optionally boot at a certain OPP without needing cpufreq. + +Typical usage of the OPP library is as follows:: + + (users) -> registers a set of default OPPs -> (library) + SoC framework -> modifies on required cases certain OPPs -> OPP layer + -> queries to search/retrieve information -> + +OPP layer expects each domain to be represented by a unique device pointer. SoC +framework registers a set of initial OPPs per device with the OPP layer. This +list is expected to be an optimally small number typically around 5 per device. +This initial list contains a set of OPPs that the framework expects to be safely +enabled by default in the system. + +Note on OPP Availability +^^^^^^^^^^^^^^^^^^^^^^^^ + +As the system proceeds to operate, SoC framework may choose to make certain +OPPs available or not available on each device based on various external +factors. Example usage: Thermal management or other exceptional situations where +SoC framework might choose to disable a higher frequency OPP to safely continue +operations until that OPP could be re-enabled if possible. + +OPP library facilitates this concept in it's implementation. The following +operational functions operate only on available opps: +opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count + +dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then +be used for dev_pm_opp_enable/disable functions to make an opp available as required. + +WARNING: Users of OPP library should refresh their availability count using +get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the +exact mechanism to trigger these or the notification mechanism to other +dependent subsystems such as cpufreq are left to the discretion of the SoC +specific framework which uses the OPP library. Similar care needs to be taken +care to refresh the cpufreq table in cases of these operations. + +2. Initial OPP List Registration +================================ +The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per +device. It is expected that the SoC framework will register the OPP entries +optimally- typical numbers range to be less than 5. The list generated by +registering the OPPs is maintained by OPP library throughout the device +operation. The SoC framework can subsequently control the availability of the +OPPs dynamically using the dev_pm_opp_enable / disable functions. + +dev_pm_opp_add + Add a new OPP for a specific domain represented by the device pointer. + The OPP is defined using the frequency and voltage. Once added, the OPP + is assumed to be available and control of it's availability can be done + with the dev_pm_opp_enable/disable functions. OPP library internally stores + and manages this information in the opp struct. This function may be + used by SoC framework to define a optimal list as per the demands of + SoC usage environment. + + WARNING: + Do not use this function in interrupt context. + + Example:: + + soc_pm_init() + { + /* Do things */ + r = dev_pm_opp_add(mpu_dev, 1000000, 900000); + if (!r) { + pr_err("%s: unable to register mpu opp(%d)\n", r); + goto no_cpufreq; + } + /* Do cpufreq things */ + no_cpufreq: + /* Do remaining things */ + } + +3. OPP Search Functions +======================= +High level framework such as cpufreq operates on frequencies. To map the +frequency back to the corresponding OPP, OPP library provides handy functions +to search the OPP list that OPP library internally manages. These search +functions return the matching pointer representing the opp if a match is +found, else returns error. These errors are expected to be handled by standard +error checks such as IS_ERR() and appropriate actions taken by the caller. + +Callers of these functions shall call dev_pm_opp_put() after they have used the +OPP. Otherwise the memory for the OPP will never get freed and result in +memleak. + +dev_pm_opp_find_freq_exact + Search for an OPP based on an *exact* frequency and + availability. This function is especially useful to enable an OPP which + is not available by default. + Example: In a case when SoC framework detects a situation where a + higher frequency could be made available, it can use this function to + find the OPP prior to call the dev_pm_opp_enable to actually make + it available:: + + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* dont operate on the pointer.. just do a sanity check.. */ + if (IS_ERR(opp)) { + pr_err("frequency not disabled!\n"); + /* trigger appropriate actions.. */ + } else { + dev_pm_opp_enable(dev,1000000000); + } + + NOTE: + This is the only search function that operates on OPPs which are + not available. + +dev_pm_opp_find_freq_floor + Search for an available OPP which is *at most* the + provided frequency. This function is useful while searching for a lesser + match OR operating on OPP information in the order of decreasing + frequency. + Example: To find the highest opp for a device:: + + freq = ULONG_MAX; + opp = dev_pm_opp_find_freq_floor(dev, &freq); + dev_pm_opp_put(opp); + +dev_pm_opp_find_freq_ceil + Search for an available OPP which is *at least* the + provided frequency. This function is useful while searching for a + higher match OR operating on OPP information in the order of increasing + frequency. + Example 1: To find the lowest opp for a device:: + + freq = 0; + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + + Example 2: A simplified implementation of a SoC cpufreq_driver->target:: + + soc_cpufreq_target(..) + { + /* Do stuff like policy checks etc. */ + /* Find the best frequency match for the req */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + soc_switch_to_freq_voltage(freq); + else + /* do something when we can't satisfy the req */ + /* do other stuff */ + } + +4. OPP Availability Control Functions +===================================== +A default OPP list registered with the OPP library may not cater to all possible +situation. The OPP library provides a set of functions to modify the +availability of a OPP within the OPP list. This allows SoC frameworks to have +fine grained dynamic control of which sets of OPPs are operationally available. +These functions are intended to *temporarily* remove an OPP in conditions such +as thermal considerations (e.g. don't use OPPx until the temperature drops). + +WARNING: + Do not use these functions in interrupt context. + +dev_pm_opp_enable + Make a OPP available for operation. + Example: Lets say that 1GHz OPP is to be made available only if the + SoC temperature is lower than a certain threshold. The SoC framework + implementation might choose to do something as follows:: + + if (cur_temp < temp_low_thresh) { + /* Enable 1GHz if it was disabled */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* just error check */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_enable(dev, 1000000000); + else + goto try_something_else; + } + +dev_pm_opp_disable + Make an OPP to be not available for operation + Example: Lets say that 1GHz OPP is to be disabled if the temperature + exceeds a threshold value. The SoC framework implementation might + choose to do something as follows:: + + if (cur_temp > temp_high_thresh) { + /* Disable 1GHz if it was enabled */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true); + dev_pm_opp_put(opp); + /* just error check */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_disable(dev, 1000000000); + else + goto try_something_else; + } + +5. OPP Data Retrieval Functions +=============================== +Since OPP library abstracts away the OPP information, a set of functions to pull +information from the OPP structure is necessary. Once an OPP pointer is +retrieved using the search functions, the following functions can be used by SoC +framework to retrieve the information represented inside the OPP layer. + +dev_pm_opp_get_voltage + Retrieve the voltage represented by the opp pointer. + Example: At a cpufreq transition to a different frequency, SoC + framework requires to set the voltage represented by the OPP using + the regulator framework to the Power Management chip providing the + voltage:: + + soc_switch_to_freq_voltage(freq) + { + /* do things */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + v = dev_pm_opp_get_voltage(opp); + dev_pm_opp_put(opp); + if (v) + regulator_set_voltage(.., v); + /* do other things */ + } + +dev_pm_opp_get_freq + Retrieve the freq represented by the opp pointer. + Example: Lets say the SoC framework uses a couple of helper functions + we could pass opp pointers instead of doing additional parameters to + handle quiet a bit of data parameters:: + + soc_cpufreq_target(..) + { + /* do things.. */ + max_freq = ULONG_MAX; + max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq); + requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq); + if (!IS_ERR(max_opp) && !IS_ERR(requested_opp)) + r = soc_test_validity(max_opp, requested_opp); + dev_pm_opp_put(max_opp); + dev_pm_opp_put(requested_opp); + /* do other things */ + } + soc_test_validity(..) + { + if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp)) + return -EINVAL; + if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp)) + return -EINVAL; + /* do things.. */ + } + +dev_pm_opp_get_opp_count + Retrieve the number of available opps for a device + Example: Lets say a co-processor in the SoC needs to know the available + frequencies in a table, the main processor can notify as following:: + + soc_notify_coproc_available_frequencies() + { + /* Do things */ + num_available = dev_pm_opp_get_opp_count(dev); + speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + /* populate the table in increasing order */ + freq = 0; + while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { + speeds[i] = freq; + freq++; + i++; + dev_pm_opp_put(opp); + } + + soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available); + /* Do other things */ + } + +6. Data Structures +================== +Typically an SoC contains multiple voltage domains which are variable. Each +domain is represented by a device pointer. The relationship to OPP can be +represented as follows:: + + SoC + |- device 1 + | |- opp 1 (availability, freq, voltage) + | |- opp 2 .. + ... ... + | `- opp n .. + |- device 2 + ... + `- device m + +OPP library maintains a internal list that the SoC framework populates and +accessed by various functions as described above. However, the structures +representing the actual OPPs and domains are internal to the OPP library itself +to allow for suitable abstraction reusable across systems. + +struct dev_pm_opp + The internal data structure of OPP library which is used to + represent an OPP. In addition to the freq, voltage, availability + information, it also contains internal book keeping information required + for the OPP library to operate on. Pointer to this structure is + provided back to the users such as SoC framework to be used as a + identifier for OPP in the interactions with OPP layer. + + WARNING: + The struct dev_pm_opp pointer should not be parsed or modified by the + users. The defaults of for an instance is populated by + dev_pm_opp_add, but the availability of the OPP can be modified + by dev_pm_opp_enable/disable functions. + +struct device + This is used to identify a domain to the OPP layer. The + nature of the device and it's implementation is left to the user of + OPP library such as the SoC framework. + +Overall, in a simplistic view, the data structure operations is represented as +following:: + + Initialization / modification: + +-----+ /- dev_pm_opp_enable + dev_pm_opp_add --> | opp | <------- + | +-----+ \- dev_pm_opp_disable + \-------> domain_info(device) + + Search functions: + /-- dev_pm_opp_find_freq_ceil ---\ +-----+ + domain_info<---- dev_pm_opp_find_freq_exact -----> | opp | + \-- dev_pm_opp_find_freq_floor ---/ +-----+ + + Retrieval functions: + +-----+ /- dev_pm_opp_get_voltage + | opp | <--- + +-----+ \- dev_pm_opp_get_freq + + domain_info <- dev_pm_opp_get_opp_count diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt deleted file mode 100644 index 0c007e250cd1..000000000000 --- a/Documentation/power/opp.txt +++ /dev/null @@ -1,342 +0,0 @@ -Operating Performance Points (OPP) Library -========================================== - -(C) 2009-2010 Nishanth Menon , Texas Instruments Incorporated - -Contents --------- -1. Introduction -2. Initial OPP List Registration -3. OPP Search Functions -4. OPP Availability Control Functions -5. OPP Data Retrieval Functions -6. Data Structures - -1. Introduction -=============== -1.1 What is an Operating Performance Point (OPP)? - -Complex SoCs of today consists of a multiple sub-modules working in conjunction. -In an operational system executing varied use cases, not all modules in the SoC -need to function at their highest performing frequency all the time. To -facilitate this, sub-modules in a SoC are grouped into domains, allowing some -domains to run at lower voltage and frequency while other domains run at -voltage/frequency pairs that are higher. - -The set of discrete tuples consisting of frequency and voltage pairs that -the device will support per domain are called Operating Performance Points or -OPPs. - -As an example: -Let us consider an MPU device which supports the following: -{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V}, -{1GHz at minimum voltage of 1.3V} - -We can represent these as three OPPs as the following {Hz, uV} tuples: -{300000000, 1000000} -{800000000, 1200000} -{1000000000, 1300000} - -1.2 Operating Performance Points Library - -OPP library provides a set of helper functions to organize and query the OPP -information. The library is located in drivers/base/power/opp.c and the header -is located in include/linux/pm_opp.h. OPP library can be enabled by enabling -CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on -CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to -optionally boot at a certain OPP without needing cpufreq. - -Typical usage of the OPP library is as follows: -(users) -> registers a set of default OPPs -> (library) -SoC framework -> modifies on required cases certain OPPs -> OPP layer - -> queries to search/retrieve information -> - -OPP layer expects each domain to be represented by a unique device pointer. SoC -framework registers a set of initial OPPs per device with the OPP layer. This -list is expected to be an optimally small number typically around 5 per device. -This initial list contains a set of OPPs that the framework expects to be safely -enabled by default in the system. - -Note on OPP Availability: ------------------------- -As the system proceeds to operate, SoC framework may choose to make certain -OPPs available or not available on each device based on various external -factors. Example usage: Thermal management or other exceptional situations where -SoC framework might choose to disable a higher frequency OPP to safely continue -operations until that OPP could be re-enabled if possible. - -OPP library facilitates this concept in it's implementation. The following -operational functions operate only on available opps: -opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count - -dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then -be used for dev_pm_opp_enable/disable functions to make an opp available as required. - -WARNING: Users of OPP library should refresh their availability count using -get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the -exact mechanism to trigger these or the notification mechanism to other -dependent subsystems such as cpufreq are left to the discretion of the SoC -specific framework which uses the OPP library. Similar care needs to be taken -care to refresh the cpufreq table in cases of these operations. - -2. Initial OPP List Registration -================================ -The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per -device. It is expected that the SoC framework will register the OPP entries -optimally- typical numbers range to be less than 5. The list generated by -registering the OPPs is maintained by OPP library throughout the device -operation. The SoC framework can subsequently control the availability of the -OPPs dynamically using the dev_pm_opp_enable / disable functions. - -dev_pm_opp_add - Add a new OPP for a specific domain represented by the device pointer. - The OPP is defined using the frequency and voltage. Once added, the OPP - is assumed to be available and control of it's availability can be done - with the dev_pm_opp_enable/disable functions. OPP library internally stores - and manages this information in the opp struct. This function may be - used by SoC framework to define a optimal list as per the demands of - SoC usage environment. - - WARNING: Do not use this function in interrupt context. - - Example: - soc_pm_init() - { - /* Do things */ - r = dev_pm_opp_add(mpu_dev, 1000000, 900000); - if (!r) { - pr_err("%s: unable to register mpu opp(%d)\n", r); - goto no_cpufreq; - } - /* Do cpufreq things */ - no_cpufreq: - /* Do remaining things */ - } - -3. OPP Search Functions -======================= -High level framework such as cpufreq operates on frequencies. To map the -frequency back to the corresponding OPP, OPP library provides handy functions -to search the OPP list that OPP library internally manages. These search -functions return the matching pointer representing the opp if a match is -found, else returns error. These errors are expected to be handled by standard -error checks such as IS_ERR() and appropriate actions taken by the caller. - -Callers of these functions shall call dev_pm_opp_put() after they have used the -OPP. Otherwise the memory for the OPP will never get freed and result in -memleak. - -dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and - availability. This function is especially useful to enable an OPP which - is not available by default. - Example: In a case when SoC framework detects a situation where a - higher frequency could be made available, it can use this function to - find the OPP prior to call the dev_pm_opp_enable to actually make it available. - opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); - dev_pm_opp_put(opp); - /* dont operate on the pointer.. just do a sanity check.. */ - if (IS_ERR(opp)) { - pr_err("frequency not disabled!\n"); - /* trigger appropriate actions.. */ - } else { - dev_pm_opp_enable(dev,1000000000); - } - - NOTE: This is the only search function that operates on OPPs which are - not available. - -dev_pm_opp_find_freq_floor - Search for an available OPP which is *at most* the - provided frequency. This function is useful while searching for a lesser - match OR operating on OPP information in the order of decreasing - frequency. - Example: To find the highest opp for a device: - freq = ULONG_MAX; - opp = dev_pm_opp_find_freq_floor(dev, &freq); - dev_pm_opp_put(opp); - -dev_pm_opp_find_freq_ceil - Search for an available OPP which is *at least* the - provided frequency. This function is useful while searching for a - higher match OR operating on OPP information in the order of increasing - frequency. - Example 1: To find the lowest opp for a device: - freq = 0; - opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); - Example 2: A simplified implementation of a SoC cpufreq_driver->target: - soc_cpufreq_target(..) - { - /* Do stuff like policy checks etc. */ - /* Find the best frequency match for the req */ - opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); - if (!IS_ERR(opp)) - soc_switch_to_freq_voltage(freq); - else - /* do something when we can't satisfy the req */ - /* do other stuff */ - } - -4. OPP Availability Control Functions -===================================== -A default OPP list registered with the OPP library may not cater to all possible -situation. The OPP library provides a set of functions to modify the -availability of a OPP within the OPP list. This allows SoC frameworks to have -fine grained dynamic control of which sets of OPPs are operationally available. -These functions are intended to *temporarily* remove an OPP in conditions such -as thermal considerations (e.g. don't use OPPx until the temperature drops). - -WARNING: Do not use these functions in interrupt context. - -dev_pm_opp_enable - Make a OPP available for operation. - Example: Lets say that 1GHz OPP is to be made available only if the - SoC temperature is lower than a certain threshold. The SoC framework - implementation might choose to do something as follows: - if (cur_temp < temp_low_thresh) { - /* Enable 1GHz if it was disabled */ - opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); - dev_pm_opp_put(opp); - /* just error check */ - if (!IS_ERR(opp)) - ret = dev_pm_opp_enable(dev, 1000000000); - else - goto try_something_else; - } - -dev_pm_opp_disable - Make an OPP to be not available for operation - Example: Lets say that 1GHz OPP is to be disabled if the temperature - exceeds a threshold value. The SoC framework implementation might - choose to do something as follows: - if (cur_temp > temp_high_thresh) { - /* Disable 1GHz if it was enabled */ - opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true); - dev_pm_opp_put(opp); - /* just error check */ - if (!IS_ERR(opp)) - ret = dev_pm_opp_disable(dev, 1000000000); - else - goto try_something_else; - } - -5. OPP Data Retrieval Functions -=============================== -Since OPP library abstracts away the OPP information, a set of functions to pull -information from the OPP structure is necessary. Once an OPP pointer is -retrieved using the search functions, the following functions can be used by SoC -framework to retrieve the information represented inside the OPP layer. - -dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer. - Example: At a cpufreq transition to a different frequency, SoC - framework requires to set the voltage represented by the OPP using - the regulator framework to the Power Management chip providing the - voltage. - soc_switch_to_freq_voltage(freq) - { - /* do things */ - opp = dev_pm_opp_find_freq_ceil(dev, &freq); - v = dev_pm_opp_get_voltage(opp); - dev_pm_opp_put(opp); - if (v) - regulator_set_voltage(.., v); - /* do other things */ - } - -dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer. - Example: Lets say the SoC framework uses a couple of helper functions - we could pass opp pointers instead of doing additional parameters to - handle quiet a bit of data parameters. - soc_cpufreq_target(..) - { - /* do things.. */ - max_freq = ULONG_MAX; - max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq); - requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq); - if (!IS_ERR(max_opp) && !IS_ERR(requested_opp)) - r = soc_test_validity(max_opp, requested_opp); - dev_pm_opp_put(max_opp); - dev_pm_opp_put(requested_opp); - /* do other things */ - } - soc_test_validity(..) - { - if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp)) - return -EINVAL; - if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp)) - return -EINVAL; - /* do things.. */ - } - -dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device - Example: Lets say a co-processor in the SoC needs to know the available - frequencies in a table, the main processor can notify as following: - soc_notify_coproc_available_frequencies() - { - /* Do things */ - num_available = dev_pm_opp_get_opp_count(dev); - speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); - /* populate the table in increasing order */ - freq = 0; - while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { - speeds[i] = freq; - freq++; - i++; - dev_pm_opp_put(opp); - } - - soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available); - /* Do other things */ - } - -6. Data Structures -================== -Typically an SoC contains multiple voltage domains which are variable. Each -domain is represented by a device pointer. The relationship to OPP can be -represented as follows: -SoC - |- device 1 - | |- opp 1 (availability, freq, voltage) - | |- opp 2 .. - ... ... - | `- opp n .. - |- device 2 - ... - `- device m - -OPP library maintains a internal list that the SoC framework populates and -accessed by various functions as described above. However, the structures -representing the actual OPPs and domains are internal to the OPP library itself -to allow for suitable abstraction reusable across systems. - -struct dev_pm_opp - The internal data structure of OPP library which is used to - represent an OPP. In addition to the freq, voltage, availability - information, it also contains internal book keeping information required - for the OPP library to operate on. Pointer to this structure is - provided back to the users such as SoC framework to be used as a - identifier for OPP in the interactions with OPP layer. - - WARNING: The struct dev_pm_opp pointer should not be parsed or modified by the - users. The defaults of for an instance is populated by dev_pm_opp_add, but the - availability of the OPP can be modified by dev_pm_opp_enable/disable functions. - -struct device - This is used to identify a domain to the OPP layer. The - nature of the device and it's implementation is left to the user of - OPP library such as the SoC framework. - -Overall, in a simplistic view, the data structure operations is represented as -following: - -Initialization / modification: - +-----+ /- dev_pm_opp_enable -dev_pm_opp_add --> | opp | <------- - | +-----+ \- dev_pm_opp_disable - \-------> domain_info(device) - -Search functions: - /-- dev_pm_opp_find_freq_ceil ---\ +-----+ -domain_info<---- dev_pm_opp_find_freq_exact -----> | opp | - \-- dev_pm_opp_find_freq_floor ---/ +-----+ - -Retrieval functions: -+-----+ /- dev_pm_opp_get_voltage -| opp | <--- -+-----+ \- dev_pm_opp_get_freq - -domain_info <- dev_pm_opp_get_opp_count diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst new file mode 100644 index 000000000000..0e2ef7429304 --- /dev/null +++ b/Documentation/power/pci.rst @@ -0,0 +1,1135 @@ +==================== +PCI Power Management +==================== + +Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. + +An overview of concepts and the Linux kernel's interfaces related to PCI power +management. Based on previous work by Patrick Mochel +(and others). + +This document only covers the aspects of power management specific to PCI +devices. For general description of the kernel's interfaces related to device +power management refer to Documentation/driver-api/pm/devices.rst and +Documentation/power/runtime_pm.rst. + +.. contents: + + 1. Hardware and Platform Support for PCI Power Management + 2. PCI Subsystem and Device Power Management + 3. PCI Device Drivers and Power Management + 4. Resources + + +1. Hardware and Platform Support for PCI Power Management +========================================================= + +1.1. Native and Platform-Based Power Management +----------------------------------------------- + +In general, power management is a feature allowing one to save energy by putting +devices into states in which they draw less power (low-power states) at the +price of reduced functionality or performance. + +Usually, a device is put into a low-power state when it is underutilized or +completely inactive. However, when it is necessary to use the device once +again, it has to be put back into the "fully functional" state (full-power +state). This may happen when there are some data for the device to handle or +as a result of an external event requiring the device to be active, which may +be signaled by the device itself. + +PCI devices may be put into low-power states in two ways, by using the device +capabilities introduced by the PCI Bus Power Management Interface Specification, +or with the help of platform firmware, such as an ACPI BIOS. In the first +approach, that is referred to as the native PCI power management (native PCI PM) +in what follows, the device power state is changed as a result of writing a +specific value into one of its standard configuration registers. The second +approach requires the platform firmware to provide special methods that may be +used by the kernel to change the device's power state. + +Devices supporting the native PCI PM usually can generate wakeup signals called +Power Management Events (PMEs) to let the kernel know about external events +requiring the device to be active. After receiving a PME the kernel is supposed +to put the device that sent it into the full-power state. However, the PCI Bus +Power Management Interface Specification doesn't define any standard method of +delivering the PME from the device to the CPU and the operating system kernel. +It is assumed that the platform firmware will perform this task and therefore, +even though a PCI device is set up to generate PMEs, it also may be necessary to +prepare the platform firmware for notifying the CPU of the PMEs coming from the +device (e.g. by generating interrupts). + +In turn, if the methods provided by the platform firmware are used for changing +the power state of a device, usually the platform also provides a method for +preparing the device to generate wakeup signals. In that case, however, it +often also is necessary to prepare the device for generating PMEs using the +native PCI PM mechanism, because the method provided by the platform depends on +that. + +Thus in many situations both the native and the platform-based power management +mechanisms have to be used simultaneously to obtain the desired result. + +1.2. Native PCI Power Management +-------------------------------- + +The PCI Bus Power Management Interface Specification (PCI PM Spec) was +introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a +standard interface for performing various operations related to power +management. + +The implementation of the PCI PM Spec is optional for conventional PCI devices, +but it is mandatory for PCI Express devices. If a device supports the PCI PM +Spec, it has an 8 byte power management capability field in its PCI +configuration space. This field is used to describe and control the standard +features related to the native PCI power management. + +The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses +(B0-B3). The higher the number, the less power is drawn by the device or bus +in that state. However, the higher the number, the longer the latency for +the device or bus to return to the full-power state (D0 or B0, respectively). + +There are two variants of the D3 state defined by the specification. The first +one is D3hot, referred to as the software accessible D3, because devices can be +programmed to go into it. The second one, D3cold, is the state that PCI devices +are in when the supply voltage (Vcc) is removed from them. It is not possible +to program a PCI device to go into D3cold, although there may be a programmable +interface for putting the bus the device is on into a state in which Vcc is +removed from all devices on the bus. + +PCI bus power management, however, is not supported by the Linux kernel at the +time of this writing and therefore it is not covered by this document. + +Note that every PCI device can be in the full-power state (D0) or in D3cold, +regardless of whether or not it implements the PCI PM Spec. In addition to +that, if the PCI PM Spec is implemented by the device, it must support D3hot +as well as D0. The support for the D1 and D2 power states is optional. + +PCI devices supporting the PCI PM Spec can be programmed to go to any of the +supported low-power states (except for D3cold). While in D1-D3hot the +standard configuration registers of the device must be accessible to software +(i.e. the device is required to respond to PCI configuration accesses), although +its I/O and memory spaces are then disabled. This allows the device to be +programmatically put into D0. Thus the kernel can switch the device back and +forth between D0 and the supported low-power states (except for D3cold) and the +possible power state transitions the device can undergo are the following: + ++----------------------------+ +| Current State | New State | ++----------------------------+ +| D0 | D1, D2, D3 | ++----------------------------+ +| D1 | D2, D3 | ++----------------------------+ +| D2 | D3 | ++----------------------------+ +| D1, D2, D3 | D0 | ++----------------------------+ + +The transition from D3cold to D0 occurs when the supply voltage is provided to +the device (i.e. power is restored). In that case the device returns to D0 with +a full power-on reset sequence and the power-on defaults are restored to the +device by hardware just as at initial power up. + +PCI devices supporting the PCI PM Spec can be programmed to generate PMEs +while in a low-power state (D1-D3), but they are not required to be capable +of generating PMEs from all supported low-power states. In particular, the +capability of generating PMEs from D3cold is optional and depends on the +presence of additional voltage (3.3Vaux) allowing the device to remain +sufficiently active to generate a wakeup signal. + +1.3. ACPI Device Power Management +--------------------------------- + +The platform firmware support for the power management of PCI devices is +system-specific. However, if the system in question is compliant with the +Advanced Configuration and Power Interface (ACPI) Specification, like the +majority of x86-based systems, it is supposed to implement device power +management interfaces defined by the ACPI standard. + +For this purpose the ACPI BIOS provides special functions called "control +methods" that may be executed by the kernel to perform specific tasks, such as +putting a device into a low-power state. These control methods are encoded +using special byte-code language called the ACPI Machine Language (AML) and +stored in the machine's BIOS. The kernel loads them from the BIOS and executes +them as needed using an AML interpreter that translates the AML byte code into +computations and memory or I/O space accesses. This way, in theory, a BIOS +writer can provide the kernel with a means to perform actions depending +on the system design in a system-specific fashion. + +ACPI control methods may be divided into global control methods, that are not +associated with any particular devices, and device control methods, that have +to be defined separately for each device supposed to be handled with the help of +the platform. This means, in particular, that ACPI device control methods can +only be used to handle devices that the BIOS writer knew about in advance. The +ACPI methods used for device power management fall into that category. + +The ACPI specification assumes that devices can be in one of four power states +labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM +D0-D3 states (although the difference between D3hot and D3cold is not taken +into account by ACPI). Moreover, for each power state of a device there is a +set of power resources that have to be enabled for the device to be put into +that state. These power resources are controlled (i.e. enabled or disabled) +with the help of their own control methods, _ON and _OFF, that have to be +defined individually for each of them. + +To put a device into the ACPI power state Dx (where x is a number between 0 and +3 inclusive) the kernel is supposed to (1) enable the power resources required +by the device in this state using their _ON control methods and (2) execute the +_PSx control method defined for the device. In addition to that, if the device +is going to be put into a low-power state (D1-D3) and is supposed to generate +wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI +3.0) control method defined for it has to be executed before _PSx. Power +resources that are not required by the device in the target power state and are +not required any more by any other device should be disabled (by executing their +_OFF control methods). If the current power state of the device is D3, it can +only be put into D0 this way. + +However, quite often the power states of devices are changed during a +system-wide transition into a sleep state or back into the working state. ACPI +defines four system sleep states, S1, S2, S3, and S4, and denotes the system +working state as S0. In general, the target system sleep (or working) state +determines the highest power (lowest number) state the device can be put +into and the kernel is supposed to obtain this information by executing the +device's _SxD control method (where x is a number between 0 and 4 inclusive). +If the device is required to wake up the system from the target sleep state, the +lowest power (highest number) state it can be put into is also determined by the +target state of the system. The kernel is then supposed to use the device's +_SxW control method to obtain the number of that state. It also is supposed to +use the device's _PRW control method to learn which power resources need to be +enabled for the device to be able to generate wakeup signals. + +1.4. Wakeup Signaling +--------------------- + +Wakeup signals generated by PCI devices, either as native PCI PMEs, or as +a result of the execution of the _DSW (or _PSW) ACPI control method before +putting the device into a low-power state, have to be caught and handled as +appropriate. If they are sent while the system is in the working state +(ACPI S0), they should be translated into interrupts so that the kernel can +put the devices generating them into the full-power state and take care of the +events that triggered them. In turn, if they are sent while the system is +sleeping, they should cause the system's core logic to trigger wakeup. + +On ACPI-based systems wakeup signals sent by conventional PCI devices are +converted into ACPI General-Purpose Events (GPEs) which are hardware signals +from the system core logic generated in response to various events that need to +be acted upon. Every GPE is associated with one or more sources of potentially +interesting events. In particular, a GPE may be associated with a PCI device +capable of signaling wakeup. The information on the connections between GPEs +and event sources is recorded in the system's ACPI BIOS from where it can be +read by the kernel. + +If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE +associated with it (if there is one) is triggered. The GPEs associated with PCI +bridges may also be triggered in response to a wakeup signal from one of the +devices below the bridge (this also is the case for root bridges) and, for +example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be +handled this way. + +A GPE may be triggered when the system is sleeping (i.e. when it is in one of +the ACPI S1-S4 states), in which case system wakeup is started by its core logic +(the device that was the source of the signal causing the system wakeup to occur +may be identified later). The GPEs used in such situations are referred to as +wakeup GPEs. + +Usually, however, GPEs are also triggered when the system is in the working +state (ACPI S0) and in that case the system's core logic generates a System +Control Interrupt (SCI) to notify the kernel of the event. Then, the SCI +handler identifies the GPE that caused the interrupt to be generated which, +in turn, allows the kernel to identify the source of the event (that may be +a PCI device signaling wakeup). The GPEs used for notifying the kernel of +events occurring while the system is in the working state are referred to as +runtime GPEs. + +Unfortunately, there is no standard way of handling wakeup signals sent by +conventional PCI devices on systems that are not ACPI-based, but there is one +for PCI Express devices. Namely, the PCI Express Base Specification introduced +a native mechanism for converting native PCI PMEs into interrupts generated by +root ports. For conventional PCI devices native PMEs are out-of-band, so they +are routed separately and they need not pass through bridges (in principle they +may be routed directly to the system's core logic), but for PCI Express devices +they are in-band messages that have to pass through the PCI Express hierarchy, +including the root port on the path from the device to the Root Complex. Thus +it was possible to introduce a mechanism by which a root port generates an +interrupt whenever it receives a PME message from one of the devices below it. +The PCI Express Requester ID of the device that sent the PME message is then +recorded in one of the root port's configuration registers from where it may be +read by the interrupt handler allowing the device to be identified. [PME +messages sent by PCI Express endpoints integrated with the Root Complex don't +pass through root ports, but instead they cause a Root Complex Event Collector +(if there is one) to generate interrupts.] + +In principle the native PCI Express PME signaling may also be used on ACPI-based +systems along with the GPEs, but to use it the kernel has to ask the system's +ACPI BIOS to release control of root port configuration registers. The ACPI +BIOS, however, is not required to allow the kernel to control these registers +and if it doesn't do that, the kernel must not modify their contents. Of course +the native PCI Express PME signaling cannot be used by the kernel in that case. + + +2. PCI Subsystem and Device Power Management +============================================ + +2.1. Device Power Management Callbacks +-------------------------------------- + +The PCI Subsystem participates in the power management of PCI devices in a +number of ways. First of all, it provides an intermediate code layer between +the device power management core (PM core) and PCI device drivers. +Specifically, the pm field of the PCI subsystem's struct bus_type object, +pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing +pointers to several device power management callbacks:: + + const struct dev_pm_ops pci_dev_pm_ops = { + .prepare = pci_pm_prepare, + .complete = pci_pm_complete, + .suspend = pci_pm_suspend, + .resume = pci_pm_resume, + .freeze = pci_pm_freeze, + .thaw = pci_pm_thaw, + .poweroff = pci_pm_poweroff, + .restore = pci_pm_restore, + .suspend_noirq = pci_pm_suspend_noirq, + .resume_noirq = pci_pm_resume_noirq, + .freeze_noirq = pci_pm_freeze_noirq, + .thaw_noirq = pci_pm_thaw_noirq, + .poweroff_noirq = pci_pm_poweroff_noirq, + .restore_noirq = pci_pm_restore_noirq, + .runtime_suspend = pci_pm_runtime_suspend, + .runtime_resume = pci_pm_runtime_resume, + .runtime_idle = pci_pm_runtime_idle, + }; + +These callbacks are executed by the PM core in various situations related to +device power management and they, in turn, execute power management callbacks +provided by PCI device drivers. They also perform power management operations +involving some standard configuration registers of PCI devices that device +drivers need not know or care about. + +The structure representing a PCI device, struct pci_dev, contains several fields +that these callbacks operate on:: + + struct pci_dev { + ... + pci_power_t current_state; /* Current operating state. */ + int pm_cap; /* PM capability offset in the + configuration space */ + unsigned int pme_support:5; /* Bitmask of states from which PME# + can be generated */ + unsigned int pme_interrupt:1;/* Is native PCIe PME signaling used? */ + unsigned int d1_support:1; /* Low power state D1 is supported */ + unsigned int d2_support:1; /* Low power state D2 is supported */ + unsigned int no_d1d2:1; /* D1 and D2 are forbidden */ + unsigned int wakeup_prepared:1; /* Device prepared for wake up */ + unsigned int d3_delay; /* D3->D0 transition time in ms */ + ... + }; + +They also indirectly use some fields of the struct device that is embedded in +struct pci_dev. + +2.2. Device Initialization +-------------------------- + +The PCI subsystem's first task related to device power management is to +prepare the device for power management and initialize the fields of struct +pci_dev used for this purpose. This happens in two functions defined in +drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init(). + +The first of these functions checks if the device supports native PCI PM +and if that's the case the offset of its power management capability structure +in the configuration space is stored in the pm_cap field of the device's struct +pci_dev object. Next, the function checks which PCI low-power states are +supported by the device and from which low-power states the device can generate +native PCI PMEs. The power management fields of the device's struct pci_dev and +the struct device embedded in it are updated accordingly and the generation of +PMEs by the device is disabled. + +The second function checks if the device can be prepared to signal wakeup with +the help of the platform firmware, such as the ACPI BIOS. If that is the case, +the function updates the wakeup fields in struct device embedded in the +device's struct pci_dev and uses the firmware-provided method to prevent the +device from signaling wakeup. + +At this point the device is ready for power management. For driverless devices, +however, this functionality is limited to a few basic operations carried out +during system-wide transitions to a sleep state and back to the working state. + +2.3. Runtime Device Power Management +------------------------------------ + +The PCI subsystem plays a vital role in the runtime power management of PCI +devices. For this purpose it uses the general runtime power management +(runtime PM) framework described in Documentation/power/runtime_pm.rst. +Namely, it provides subsystem-level callbacks:: + + pci_pm_runtime_suspend() + pci_pm_runtime_resume() + pci_pm_runtime_idle() + +that are executed by the core runtime PM routines. It also implements the +entire mechanics necessary for handling runtime wakeup signals from PCI devices +in low-power states, which at the time of this writing works for both the native +PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in +Section 1. + +First, a PCI device is put into a low-power state, or suspended, with the help +of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call +pci_pm_runtime_suspend() to do the actual job. For this to work, the device's +driver has to provide a pm->runtime_suspend() callback (see below), which is +run by pci_pm_runtime_suspend() as the first action. If the driver's callback +returns successfully, the device's standard configuration registers are saved, +the device is prepared to generate wakeup signals and, finally, it is put into +the target low-power state. + +The low-power state to put the device into is the lowest-power (highest number) +state from which it can signal wakeup. The exact method of signaling wakeup is +system-dependent and is determined by the PCI subsystem on the basis of the +reported capabilities of the device and the platform firmware. To prepare the +device for signaling wakeup and put it into the selected low-power state, the +PCI subsystem can use the platform firmware as well as the device's native PCI +PM capabilities, if supported. + +It is expected that the device driver's pm->runtime_suspend() callback will +not attempt to prepare the device for signaling wakeup or to put it into a +low-power state. The driver ought to leave these tasks to the PCI subsystem +that has all of the information necessary to perform them. + +A suspended device is brought back into the "active" state, or resumed, +with the help of pm_request_resume() or pm_runtime_resume() which both call +pci_pm_runtime_resume() for PCI devices. Again, this only works if the device's +driver provides a pm->runtime_resume() callback (see below). However, before +the driver's callback is executed, pci_pm_runtime_resume() brings the device +back into the full-power state, prevents it from signaling wakeup while in that +state and restores its standard configuration registers. Thus the driver's +callback need not worry about the PCI-specific aspects of the device resume. + +Note that generally pci_pm_runtime_resume() may be called in two different +situations. First, it may be called at the request of the device's driver, for +example if there are some data for it to process. Second, it may be called +as a result of a wakeup signal from the device itself (this sometimes is +referred to as "remote wakeup"). Of course, for this purpose the wakeup signal +is handled in one of the ways described in Section 1 and finally converted into +a notification for the PCI subsystem after the source device has been +identified. + +The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle() +and pm_request_idle(), executes the device driver's pm->runtime_idle() +callback, if defined, and if that callback doesn't return error code (or is not +present at all), suspends the device with the help of pm_runtime_suspend(). +Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for +example, it is called right after the device has just been resumed), in which +cases it is expected to suspend the device if that makes sense. Usually, +however, the PCI subsystem doesn't really know if the device really can be +suspended, so it lets the device's driver decide by running its +pm->runtime_idle() callback. + +2.4. System-Wide Power Transitions +---------------------------------- +There are a few different types of system-wide power transitions, described in +Documentation/driver-api/pm/devices.rst. Each of them requires devices to be handled +in a specific way and the PM core executes subsystem-level power management +callbacks for this purpose. They are executed in phases such that each phase +involves executing the same subsystem-level callback for every device belonging +to the given subsystem before the next phase begins. These phases always run +after tasks have been frozen. + +2.4.1. System Suspend +^^^^^^^^^^^^^^^^^^^^^ + +When the system is going into a sleep state in which the contents of memory will +be preserved, such as one of the ACPI sleep states S1-S3, the phases are: + + prepare, suspend, suspend_noirq. + +The following PCI bus type's callbacks, respectively, are used in these phases:: + + pci_pm_prepare() + pci_pm_suspend() + pci_pm_suspend_noirq() + +The pci_pm_prepare() routine first puts the device into the "fully functional" +state with the help of pm_runtime_resume(). Then, it executes the device +driver's pm->prepare() callback if defined (i.e. if the driver's struct +dev_pm_ops object is present and the prepare pointer in that object is valid). + +The pci_pm_suspend() routine first checks if the device's driver implements +legacy PCI suspend routines (see Section 3), in which case the driver's legacy +suspend callback is executed, if present, and its result is returned. Next, if +the device's driver doesn't provide a struct dev_pm_ops object (containing +pointers to the driver's callbacks), pci_pm_default_suspend() is called, which +simply turns off the device's bus master capability and runs +pcibios_disable_device() to disable it, unless the device is a bridge (PCI +bridges are ignored by this routine). Next, the device driver's pm->suspend() +callback is executed, if defined, and its result is returned if it fails. +Finally, pci_fixup_device() is called to apply hardware suspend quirks related +to the device if necessary. + +Note that the suspend phase is carried out asynchronously for PCI devices, so +the pci_pm_suspend() callback may be executed in parallel for any pair of PCI +devices that don't depend on each other in a known way (i.e. none of the paths +in the device tree from the root bridge to a leaf device contains both of them). + +The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has +been called, which means that the device driver's interrupt handler won't be +invoked while this routine is running. It first checks if the device's driver +implements legacy PCI suspends routines (Section 3), in which case the legacy +late suspend routine is called and its result is returned (the standard +configuration registers of the device are saved if the driver's callback hasn't +done that). Second, if the device driver's struct dev_pm_ops object is not +present, the device's standard configuration registers are saved and the routine +returns success. Otherwise the device driver's pm->suspend_noirq() callback is +executed, if present, and its result is returned if it fails. Next, if the +device's standard configuration registers haven't been saved yet (one of the +device driver's callbacks executed before might do that), pci_pm_suspend_noirq() +saves them, prepares the device to signal wakeup (if necessary) and puts it into +a low-power state. + +The low-power state to put the device into is the lowest-power (highest number) +state from which it can signal wakeup while the system is in the target sleep +state. Just like in the runtime PM case described above, the mechanism of +signaling wakeup is system-dependent and determined by the PCI subsystem, which +is also responsible for preparing the device to signal wakeup from the system's +target sleep state as appropriate. + +PCI device drivers (that don't implement legacy power management callbacks) are +generally not expected to prepare devices for signaling wakeup or to put them +into low-power states. However, if one of the driver's suspend callbacks +(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration +registers, pci_pm_suspend_noirq() will assume that the device has been prepared +to signal wakeup and put into a low-power state by the driver (the driver is +then assumed to have used the helper functions provided by the PCI subsystem for +this purpose). PCI device drivers are not encouraged to do that, but in some +rare cases doing that in the driver may be the optimum approach. + +2.4.2. System Resume +^^^^^^^^^^^^^^^^^^^^ + +When the system is undergoing a transition from a sleep state in which the +contents of memory have been preserved, such as one of the ACPI sleep states +S1-S3, into the working state (ACPI S0), the phases are: + + resume_noirq, resume, complete. + +The following PCI bus type's callbacks, respectively, are executed in these +phases:: + + pci_pm_resume_noirq() + pci_pm_resume() + pci_pm_complete() + +The pci_pm_resume_noirq() routine first puts the device into the full-power +state, restores its standard configuration registers and applies early resume +hardware quirks related to the device, if necessary. This is done +unconditionally, regardless of whether or not the device's driver implements +legacy PCI power management callbacks (this way all PCI devices are in the +full-power state and their standard configuration registers have been restored +when their interrupt handlers are invoked for the first time during resume, +which allows the kernel to avoid problems with the handling of shared interrupts +by drivers whose devices are still suspended). If legacy PCI power management +callbacks (see Section 3) are implemented by the device's driver, the legacy +early resume callback is executed and its result is returned. Otherwise, the +device driver's pm->resume_noirq() callback is executed, if defined, and its +result is returned. + +The pci_pm_resume() routine first checks if the device's standard configuration +registers have been restored and restores them if that's not the case (this +only is necessary in the error path during a failing suspend). Next, resume +hardware quirks related to the device are applied, if necessary, and if the +device's driver implements legacy PCI power management callbacks (see +Section 3), the driver's legacy resume callback is executed and its result is +returned. Otherwise, the device's wakeup signaling mechanisms are blocked and +its driver's pm->resume() callback is executed, if defined (the callback's +result is then returned). + +The resume phase is carried out asynchronously for PCI devices, like the +suspend phase described above, which means that if two PCI devices don't depend +on each other in a known way, the pci_pm_resume() routine may be executed for +the both of them in parallel. + +The pci_pm_complete() routine only executes the device driver's pm->complete() +callback, if defined. + +2.4.3. System Hibernation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +System hibernation is more complicated than system suspend, because it requires +a system image to be created and written into a persistent storage medium. The +image is created atomically and all devices are quiesced, or frozen, before that +happens. + +The freezing of devices is carried out after enough memory has been freed (at +the time of this writing the image creation requires at least 50% of system RAM +to be free) in the following three phases: + + prepare, freeze, freeze_noirq + +that correspond to the PCI bus type's callbacks:: + + pci_pm_prepare() + pci_pm_freeze() + pci_pm_freeze_noirq() + +This means that the prepare phase is exactly the same as for system suspend. +The other two phases, however, are different. + +The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs +the device driver's pm->freeze() callback, if defined, instead of pm->suspend(), +and it doesn't apply the suspend-related hardware quirks. It is executed +asynchronously for different PCI devices that don't depend on each other in a +known way. + +The pci_pm_freeze_noirq() routine, in turn, is similar to +pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq() +routine instead of pm->suspend_noirq(). It also doesn't attempt to prepare the +device for signaling wakeup and put it into a low-power state. Still, it saves +the device's standard configuration registers if they haven't been saved by one +of the driver's callbacks. + +Once the image has been created, it has to be saved. However, at this point all +devices are frozen and they cannot handle I/O, while their ability to handle +I/O is obviously necessary for the image saving. Thus they have to be brought +back to the fully functional state and this is done in the following phases: + + thaw_noirq, thaw, complete + +using the following PCI bus type's callbacks:: + + pci_pm_thaw_noirq() + pci_pm_thaw() + pci_pm_complete() + +respectively. + +The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(), +but it doesn't put the device into the full power state and doesn't attempt to +restore its standard configuration registers. It also executes the device +driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq(). + +The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device +driver's pm->thaw() callback instead of pm->resume(). It is executed +asynchronously for different PCI devices that don't depend on each other in a +known way. + +The complete phase it the same as for system resume. + +After saving the image, devices need to be powered down before the system can +enter the target sleep state (ACPI S4 for ACPI-based systems). This is done in +three phases: + + prepare, poweroff, poweroff_noirq + +where the prepare phase is exactly the same as for system suspend. The other +two phases are analogous to the suspend and suspend_noirq phases, respectively. +The PCI subsystem-level callbacks they correspond to:: + + pci_pm_poweroff() + pci_pm_poweroff_noirq() + +work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively, +although they don't attempt to save the device's standard configuration +registers. + +2.4.4. System Restore +^^^^^^^^^^^^^^^^^^^^^ + +System restore requires a hibernation image to be loaded into memory and the +pre-hibernation memory contents to be restored before the pre-hibernation system +activity can be resumed. + +As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded +into memory by a fresh instance of the kernel, called the boot kernel, which in +turn is loaded and run by a boot loader in the usual way. After the boot kernel +has loaded the image, it needs to replace its own code and data with the code +and data of the "hibernated" kernel stored within the image, called the image +kernel. For this purpose all devices are frozen just like before creating +the image during hibernation, in the + + prepare, freeze, freeze_noirq + +phases described above. However, the devices affected by these phases are only +those having drivers in the boot kernel; other devices will still be in whatever +state the boot loader left them. + +Should the restoration of the pre-hibernation memory contents fail, the boot +kernel would go through the "thawing" procedure described above, using the +thaw_noirq, thaw, and complete phases (that will only affect the devices having +drivers in the boot kernel), and then continue running normally. + +If the pre-hibernation memory contents are restored successfully, which is the +usual situation, control is passed to the image kernel, which then becomes +responsible for bringing the system back to the working state. To achieve this, +it must restore the devices' pre-hibernation functionality, which is done much +like waking up from the memory sleep state, although it involves different +phases: + + restore_noirq, restore, complete + +The first two of these are analogous to the resume_noirq and resume phases +described above, respectively, and correspond to the following PCI subsystem +callbacks:: + + pci_pm_restore_noirq() + pci_pm_restore() + +These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(), +respectively, but they execute the device driver's pm->restore_noirq() and +pm->restore() callbacks, if available. + +The complete phase is carried out in exactly the same way as during system +resume. + + +3. PCI Device Drivers and Power Management +========================================== + +3.1. Power Management Callbacks +------------------------------- + +PCI device drivers participate in power management by providing callbacks to be +executed by the PCI subsystem's power management routines described above and by +controlling the runtime power management of their devices. + +At the time of this writing there are two ways to define power management +callbacks for a PCI device driver, the recommended one, based on using a +dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the +"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and +.resume() callbacks from struct pci_driver are used. The legacy approach, +however, doesn't allow one to define runtime power management callbacks and is +not really suitable for any new drivers. Therefore it is not covered by this +document (refer to the source code to learn more about it). + +It is recommended that all PCI device drivers define a struct dev_pm_ops object +containing pointers to power management (PM) callbacks that will be executed by +the PCI subsystem's PM routines in various circumstances. A pointer to the +driver's struct dev_pm_ops object has to be assigned to the driver.pm field in +its struct pci_driver object. Once that has happened, the "legacy" PM callbacks +in struct pci_driver are ignored (even if they are not NULL). + +The PM callbacks in struct dev_pm_ops are not mandatory and if they are not +defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI +subsystem will handle the device in a simplified default manner. If they are +defined, though, they are expected to behave as described in the following +subsections. + +3.1.1. prepare() +^^^^^^^^^^^^^^^^ + +The prepare() callback is executed during system suspend, during hibernation +(when a hibernation image is about to be created), during power-off after +saving a hibernation image and during system restore, when a hibernation image +has just been loaded into memory. + +This callback is only necessary if the driver's device has children that in +general may be registered at any time. In that case the role of the prepare() +callback is to prevent new children of the device from being registered until +one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run. + +In addition to that the prepare() callback may carry out some operations +preparing the device to be suspended, although it should not allocate memory +(if additional memory is required to suspend the device, it has to be +preallocated earlier, for example in a suspend/hibernate notifier as described +in Documentation/driver-api/pm/notifiers.rst). + +3.1.2. suspend() +^^^^^^^^^^^^^^^^ + +The suspend() callback is only executed during system suspend, after prepare() +callbacks have been executed for all devices in the system. + +This callback is expected to quiesce the device and prepare it to be put into a +low-power state by the PCI subsystem. It is not required (in fact it even is +not recommended) that a PCI driver's suspend() callback save the standard +configuration registers of the device, prepare it for waking up the system, or +put it into a low-power state. All of these operations can very well be taken +care of by the PCI subsystem, without the driver's participation. + +However, in some rare case it is convenient to carry out these operations in +a PCI driver. Then, pci_save_state(), pci_prepare_to_sleep(), and +pci_set_power_state() should be used to save the device's standard configuration +registers, to prepare it for system wakeup (if necessary), and to put it into a +low-power state, respectively. Moreover, if the driver calls pci_save_state(), +the PCI subsystem will not execute either pci_prepare_to_sleep(), or +pci_set_power_state() for its device, so the driver is then responsible for +handling the device as appropriate. + +While the suspend() callback is being executed, the driver's interrupt handler +can be invoked to handle an interrupt from the device, so all suspend-related +operations relying on the driver's ability to handle interrupts should be +carried out in this callback. + +3.1.3. suspend_noirq() +^^^^^^^^^^^^^^^^^^^^^^ + +The suspend_noirq() callback is only executed during system suspend, after +suspend() callbacks have been executed for all devices in the system and +after device interrupts have been disabled by the PM core. + +The difference between suspend_noirq() and suspend() is that the driver's +interrupt handler will not be invoked while suspend_noirq() is running. Thus +suspend_noirq() can carry out operations that would cause race conditions to +arise if they were performed in suspend(). + +3.1.4. freeze() +^^^^^^^^^^^^^^^ + +The freeze() callback is hibernation-specific and is executed in two situations, +during hibernation, after prepare() callbacks have been executed for all devices +in preparation for the creation of a system image, and during restore, +after a system image has been loaded into memory from persistent storage and the +prepare() callbacks have been executed for all devices. + +The role of this callback is analogous to the role of the suspend() callback +described above. In fact, they only need to be different in the rare cases when +the driver takes the responsibility for putting the device into a low-power +state. + +In that cases the freeze() callback should not prepare the device system wakeup +or put it into a low-power state. Still, either it or freeze_noirq() should +save the device's standard configuration registers using pci_save_state(). + +3.1.5. freeze_noirq() +^^^^^^^^^^^^^^^^^^^^^ + +The freeze_noirq() callback is hibernation-specific. It is executed during +hibernation, after prepare() and freeze() callbacks have been executed for all +devices in preparation for the creation of a system image, and during restore, +after a system image has been loaded into memory and after prepare() and +freeze() callbacks have been executed for all devices. It is always executed +after device interrupts have been disabled by the PM core. + +The role of this callback is analogous to the role of the suspend_noirq() +callback described above and it very rarely is necessary to define +freeze_noirq(). + +The difference between freeze_noirq() and freeze() is analogous to the +difference between suspend_noirq() and suspend(). + +3.1.6. poweroff() +^^^^^^^^^^^^^^^^^ + +The poweroff() callback is hibernation-specific. It is executed when the system +is about to be powered off after saving a hibernation image to a persistent +storage. prepare() callbacks are executed for all devices before poweroff() is +called. + +The role of this callback is analogous to the role of the suspend() and freeze() +callbacks described above, although it does not need to save the contents of +the device's registers. In particular, if the driver wants to put the device +into a low-power state itself instead of allowing the PCI subsystem to do that, +the poweroff() callback should use pci_prepare_to_sleep() and +pci_set_power_state() to prepare the device for system wakeup and to put it +into a low-power state, respectively, but it need not save the device's standard +configuration registers. + +3.1.7. poweroff_noirq() +^^^^^^^^^^^^^^^^^^^^^^^ + +The poweroff_noirq() callback is hibernation-specific. It is executed after +poweroff() callbacks have been executed for all devices in the system. + +The role of this callback is analogous to the role of the suspend_noirq() and +freeze_noirq() callbacks described above, but it does not need to save the +contents of the device's registers. + +The difference between poweroff_noirq() and poweroff() is analogous to the +difference between suspend_noirq() and suspend(). + +3.1.8. resume_noirq() +^^^^^^^^^^^^^^^^^^^^^ + +The resume_noirq() callback is only executed during system resume, after the +PM core has enabled the non-boot CPUs. The driver's interrupt handler will not +be invoked while resume_noirq() is running, so this callback can carry out +operations that might race with the interrupt handler. + +Since the PCI subsystem unconditionally puts all devices into the full power +state in the resume_noirq phase of system resume and restores their standard +configuration registers, resume_noirq() is usually not necessary. In general +it should only be used for performing operations that would lead to race +conditions if carried out by resume(). + +3.1.9. resume() +^^^^^^^^^^^^^^^ + +The resume() callback is only executed during system resume, after +resume_noirq() callbacks have been executed for all devices in the system and +device interrupts have been enabled by the PM core. + +This callback is responsible for restoring the pre-suspend configuration of the +device and bringing it back to the fully functional state. The device should be +able to process I/O in a usual way after resume() has returned. + +3.1.10. thaw_noirq() +^^^^^^^^^^^^^^^^^^^^ + +The thaw_noirq() callback is hibernation-specific. It is executed after a +system image has been created and the non-boot CPUs have been enabled by the PM +core, in the thaw_noirq phase of hibernation. It also may be executed if the +loading of a hibernation image fails during system restore (it is then executed +after enabling the non-boot CPUs). The driver's interrupt handler will not be +invoked while thaw_noirq() is running. + +The role of this callback is analogous to the role of resume_noirq(). The +difference between these two callbacks is that thaw_noirq() is executed after +freeze() and freeze_noirq(), so in general it does not need to modify the +contents of the device's registers. + +3.1.11. thaw() +^^^^^^^^^^^^^^ + +The thaw() callback is hibernation-specific. It is executed after thaw_noirq() +callbacks have been executed for all devices in the system and after device +interrupts have been enabled by the PM core. + +This callback is responsible for restoring the pre-freeze configuration of +the device, so that it will work in a usual way after thaw() has returned. + +3.1.12. restore_noirq() +^^^^^^^^^^^^^^^^^^^^^^^ + +The restore_noirq() callback is hibernation-specific. It is executed in the +restore_noirq phase of hibernation, when the boot kernel has passed control to +the image kernel and the non-boot CPUs have been enabled by the image kernel's +PM core. + +This callback is analogous to resume_noirq() with the exception that it cannot +make any assumption on the previous state of the device, even if the BIOS (or +generally the platform firmware) is known to preserve that state over a +suspend-resume cycle. + +For the vast majority of PCI device drivers there is no difference between +resume_noirq() and restore_noirq(). + +3.1.13. restore() +^^^^^^^^^^^^^^^^^ + +The restore() callback is hibernation-specific. It is executed after +restore_noirq() callbacks have been executed for all devices in the system and +after the PM core has enabled device drivers' interrupt handlers to be invoked. + +This callback is analogous to resume(), just like restore_noirq() is analogous +to resume_noirq(). Consequently, the difference between restore_noirq() and +restore() is analogous to the difference between resume_noirq() and resume(). + +For the vast majority of PCI device drivers there is no difference between +resume() and restore(). + +3.1.14. complete() +^^^^^^^^^^^^^^^^^^ + +The complete() callback is executed in the following situations: + + - during system resume, after resume() callbacks have been executed for all + devices, + - during hibernation, before saving the system image, after thaw() callbacks + have been executed for all devices, + - during system restore, when the system is going back to its pre-hibernation + state, after restore() callbacks have been executed for all devices. + +It also may be executed if the loading of a hibernation image into memory fails +(in that case it is run after thaw() callbacks have been executed for all +devices that have drivers in the boot kernel). + +This callback is entirely optional, although it may be necessary if the +prepare() callback performs operations that need to be reversed. + +3.1.15. runtime_suspend() +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The runtime_suspend() callback is specific to device runtime power management +(runtime PM). It is executed by the PM core's runtime PM framework when the +device is about to be suspended (i.e. quiesced and put into a low-power state) +at run time. + +This callback is responsible for freezing the device and preparing it to be +put into a low-power state, but it must allow the PCI subsystem to perform all +of the PCI-specific actions necessary for suspending the device. + +3.1.16. runtime_resume() +^^^^^^^^^^^^^^^^^^^^^^^^ + +The runtime_resume() callback is specific to device runtime PM. It is executed +by the PM core's runtime PM framework when the device is about to be resumed +(i.e. put into the full-power state and programmed to process I/O normally) at +run time. + +This callback is responsible for restoring the normal functionality of the +device after it has been put into the full-power state by the PCI subsystem. +The device is expected to be able to process I/O in the usual way after +runtime_resume() has returned. + +3.1.17. runtime_idle() +^^^^^^^^^^^^^^^^^^^^^^ + +The runtime_idle() callback is specific to device runtime PM. It is executed +by the PM core's runtime PM framework whenever it may be desirable to suspend +the device according to the PM core's information. In particular, it is +automatically executed right after runtime_resume() has returned in case the +resume of the device has happened as a result of a spurious event. + +This callback is optional, but if it is not implemented or if it returns 0, the +PCI subsystem will call pm_runtime_suspend() for the device, which in turn will +cause the driver's runtime_suspend() callback to be executed. + +3.1.18. Pointing Multiple Callback Pointers to One Routine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Although in principle each of the callbacks described in the previous +subsections can be defined as a separate function, it often is convenient to +point two or more members of struct dev_pm_ops to the same routine. There are +a few convenience macros that can be used for this purpose. + +The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one +suspend routine pointed to by the .suspend(), .freeze(), and .poweroff() +members and one resume routine pointed to by the .resume(), .thaw(), and +.restore() members. The other function pointers in this struct dev_pm_ops are +unset. + +The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it +additionally sets the .runtime_resume() pointer to the same value as +.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to +the same value as .suspend() (and .freeze() and .poweroff()). + +The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct +dev_pm_ops to indicate that one suspend routine is to be pointed to by the +.suspend(), .freeze(), and .poweroff() members and one resume routine is to +be pointed to by the .resume(), .thaw(), and .restore() members. + +3.1.19. Driver Flags for Power Management +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The PM core allows device drivers to set flags that influence the handling of +power management for the devices by the core itself and by middle layer code +including the PCI bus type. The flags should be set once at the driver probe +time with the help of the dev_pm_set_driver_flags() function and they should not +be updated directly afterwards. + +The DPM_FLAG_NEVER_SKIP flag prevents the PM core from using the direct-complete +mechanism allowing device suspend/resume callbacks to be skipped if the device +is in runtime suspend when the system suspend starts. That also affects all of +the ancestors of the device, so this flag should only be used if absolutely +necessary. + +The DPM_FLAG_SMART_PREPARE flag instructs the PCI bus type to only return a +positive value from pci_pm_prepare() if the ->prepare callback provided by the +driver of the device returns a positive value. That allows the driver to opt +out from using the direct-complete mechanism dynamically. + +The DPM_FLAG_SMART_SUSPEND flag tells the PCI bus type that from the driver's +perspective the device can be safely left in runtime suspend during system +suspend. That causes pci_pm_suspend(), pci_pm_freeze() and pci_pm_poweroff() +to skip resuming the device from runtime suspend unless there are PCI-specific +reasons for doing that. Also, it causes pci_pm_suspend_late/noirq(), +pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq() to return early +if the device remains in runtime suspend in the beginning of the "late" phase +of the system-wide transition under way. Moreover, if the device is in +runtime suspend in pci_pm_resume_noirq() or pci_pm_restore_noirq(), its runtime +power management status will be changed to "active" (as it is going to be put +into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(), +the function will set the power.direct_complete flag for it (to make the PM core +skip the subsequent "thaw" callbacks for it) and return. + +Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the +device to be left in suspend after system-wide transitions to the working state. +This flag is checked by the PM core, but the PCI bus type informs the PM core +which devices may be left in suspend from its perspective (that happens during +the "noirq" phase of system-wide suspend and analogous transitions) and next it +uses the dev_pm_may_skip_resume() helper to decide whether or not to return from +pci_pm_resume_noirq() early, as the PM core will skip the remaining resume +callbacks for the device during the transition under way and will set its +runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for +it. + +3.2. Device Runtime Power Management +------------------------------------ + +In addition to providing device power management callbacks PCI device drivers +are responsible for controlling the runtime power management (runtime PM) of +their devices. + +The PCI device runtime PM is optional, but it is recommended that PCI device +drivers implement it at least in the cases where there is a reliable way of +verifying that the device is not used (like when the network cable is detached +from an Ethernet adapter or there are no devices attached to a USB controller). + +To support the PCI runtime PM the driver first needs to implement the +runtime_suspend() and runtime_resume() callbacks. It also may need to implement +the runtime_idle() callback to prevent the device from being suspended again +every time right after the runtime_resume() callback has returned +(alternatively, the runtime_suspend() callback will have to check if the +device should really be suspended and return -EAGAIN if that is not the case). + +The runtime PM of PCI devices is enabled by default by the PCI core. PCI +device drivers do not need to enable it and should not attempt to do so. +However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid() +helper function. In addition to that, the runtime PM usage counter of +each PCI device is incremented by local_pci_probe() before executing the +probe callback provided by the device's driver. + +If a PCI driver implements the runtime PM callbacks and intends to use the +runtime PM framework provided by the PM core and the PCI subsystem, it needs +to decrement the device's runtime PM usage counter in its probe callback +function. If it doesn't do that, the counter will always be different from +zero for the device and it will never be runtime-suspended. The simplest +way to do that is by calling pm_runtime_put_noidle(), but if the driver +wants to schedule an autosuspend right away, for example, it may call +pm_runtime_put_autosuspend() instead for this purpose. Generally, it +just needs to call a function that decrements the devices usage counter +from its probe routine to make runtime PM work for the device. + +It is important to remember that the driver's runtime_suspend() callback +may be executed right after the usage counter has been decremented, because +user space may already have caused the pm_runtime_allow() helper function +unblocking the runtime PM of the device to run via sysfs, so the driver must +be prepared to cope with that. + +The driver itself should not call pm_runtime_allow(), though. Instead, it +should let user space or some platform-specific code do that (user space can +do it via sysfs as stated above), but it must be prepared to handle the +runtime PM of the device correctly as soon as pm_runtime_allow() is called +(which may happen at any time, even before the driver is loaded). + +When the driver's remove callback runs, it has to balance the decrementation +of the device's runtime PM usage counter at the probe time. For this reason, +if it has decremented the counter in its probe callback, it must run +pm_runtime_get_noresume() in its remove callback. [Since the core carries +out a runtime resume of the device and bumps up the device's usage counter +before running the driver's remove callback, the runtime PM of the device +is effectively disabled for the duration of the remove execution and all +runtime PM helper functions incrementing the device's usage counter are +then effectively equivalent to pm_runtime_get_noresume().] + +The runtime PM framework works by processing requests to suspend or resume +devices, or to check if they are idle (in which cases it is reasonable to +subsequently request that they be suspended). These requests are represented +by work items put into the power management workqueue, pm_wq. Although there +are a few situations in which power management requests are automatically +queued by the PM core (for example, after processing a request to resume a +device the PM core automatically queues a request to check if the device is +idle), device drivers are generally responsible for queuing power management +requests for their devices. For this purpose they should use the runtime PM +helper functions provided by the PM core, discussed in +Documentation/power/runtime_pm.rst. + +Devices can also be suspended and resumed synchronously, without placing a +request into pm_wq. In the majority of cases this also is done by their +drivers that use helper functions provided by the PM core for this purpose. + +For more information on the runtime PM of devices refer to +Documentation/power/runtime_pm.rst. + + +4. Resources +============ + +PCI Local Bus Specification, Rev. 3.0 + +PCI Bus Power Management Interface Specification, Rev. 1.2 + +Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b + +PCI Express Base Specification, Rev. 2.0 + +Documentation/driver-api/pm/devices.rst + +Documentation/power/runtime_pm.rst diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt deleted file mode 100644 index 8eaf9ee24d43..000000000000 --- a/Documentation/power/pci.txt +++ /dev/null @@ -1,1094 +0,0 @@ -PCI Power Management - -Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. - -An overview of concepts and the Linux kernel's interfaces related to PCI power -management. Based on previous work by Patrick Mochel -(and others). - -This document only covers the aspects of power management specific to PCI -devices. For general description of the kernel's interfaces related to device -power management refer to Documentation/driver-api/pm/devices.rst and -Documentation/power/runtime_pm.txt. - ---------------------------------------------------------------------------- - -1. Hardware and Platform Support for PCI Power Management -2. PCI Subsystem and Device Power Management -3. PCI Device Drivers and Power Management -4. Resources - - -1. Hardware and Platform Support for PCI Power Management -========================================================= - -1.1. Native and Platform-Based Power Management ------------------------------------------------ -In general, power management is a feature allowing one to save energy by putting -devices into states in which they draw less power (low-power states) at the -price of reduced functionality or performance. - -Usually, a device is put into a low-power state when it is underutilized or -completely inactive. However, when it is necessary to use the device once -again, it has to be put back into the "fully functional" state (full-power -state). This may happen when there are some data for the device to handle or -as a result of an external event requiring the device to be active, which may -be signaled by the device itself. - -PCI devices may be put into low-power states in two ways, by using the device -capabilities introduced by the PCI Bus Power Management Interface Specification, -or with the help of platform firmware, such as an ACPI BIOS. In the first -approach, that is referred to as the native PCI power management (native PCI PM) -in what follows, the device power state is changed as a result of writing a -specific value into one of its standard configuration registers. The second -approach requires the platform firmware to provide special methods that may be -used by the kernel to change the device's power state. - -Devices supporting the native PCI PM usually can generate wakeup signals called -Power Management Events (PMEs) to let the kernel know about external events -requiring the device to be active. After receiving a PME the kernel is supposed -to put the device that sent it into the full-power state. However, the PCI Bus -Power Management Interface Specification doesn't define any standard method of -delivering the PME from the device to the CPU and the operating system kernel. -It is assumed that the platform firmware will perform this task and therefore, -even though a PCI device is set up to generate PMEs, it also may be necessary to -prepare the platform firmware for notifying the CPU of the PMEs coming from the -device (e.g. by generating interrupts). - -In turn, if the methods provided by the platform firmware are used for changing -the power state of a device, usually the platform also provides a method for -preparing the device to generate wakeup signals. In that case, however, it -often also is necessary to prepare the device for generating PMEs using the -native PCI PM mechanism, because the method provided by the platform depends on -that. - -Thus in many situations both the native and the platform-based power management -mechanisms have to be used simultaneously to obtain the desired result. - -1.2. Native PCI Power Management --------------------------------- -The PCI Bus Power Management Interface Specification (PCI PM Spec) was -introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a -standard interface for performing various operations related to power -management. - -The implementation of the PCI PM Spec is optional for conventional PCI devices, -but it is mandatory for PCI Express devices. If a device supports the PCI PM -Spec, it has an 8 byte power management capability field in its PCI -configuration space. This field is used to describe and control the standard -features related to the native PCI power management. - -The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses -(B0-B3). The higher the number, the less power is drawn by the device or bus -in that state. However, the higher the number, the longer the latency for -the device or bus to return to the full-power state (D0 or B0, respectively). - -There are two variants of the D3 state defined by the specification. The first -one is D3hot, referred to as the software accessible D3, because devices can be -programmed to go into it. The second one, D3cold, is the state that PCI devices -are in when the supply voltage (Vcc) is removed from them. It is not possible -to program a PCI device to go into D3cold, although there may be a programmable -interface for putting the bus the device is on into a state in which Vcc is -removed from all devices on the bus. - -PCI bus power management, however, is not supported by the Linux kernel at the -time of this writing and therefore it is not covered by this document. - -Note that every PCI device can be in the full-power state (D0) or in D3cold, -regardless of whether or not it implements the PCI PM Spec. In addition to -that, if the PCI PM Spec is implemented by the device, it must support D3hot -as well as D0. The support for the D1 and D2 power states is optional. - -PCI devices supporting the PCI PM Spec can be programmed to go to any of the -supported low-power states (except for D3cold). While in D1-D3hot the -standard configuration registers of the device must be accessible to software -(i.e. the device is required to respond to PCI configuration accesses), although -its I/O and memory spaces are then disabled. This allows the device to be -programmatically put into D0. Thus the kernel can switch the device back and -forth between D0 and the supported low-power states (except for D3cold) and the -possible power state transitions the device can undergo are the following: - -+----------------------------+ -| Current State | New State | -+----------------------------+ -| D0 | D1, D2, D3 | -+----------------------------+ -| D1 | D2, D3 | -+----------------------------+ -| D2 | D3 | -+----------------------------+ -| D1, D2, D3 | D0 | -+----------------------------+ - -The transition from D3cold to D0 occurs when the supply voltage is provided to -the device (i.e. power is restored). In that case the device returns to D0 with -a full power-on reset sequence and the power-on defaults are restored to the -device by hardware just as at initial power up. - -PCI devices supporting the PCI PM Spec can be programmed to generate PMEs -while in a low-power state (D1-D3), but they are not required to be capable -of generating PMEs from all supported low-power states. In particular, the -capability of generating PMEs from D3cold is optional and depends on the -presence of additional voltage (3.3Vaux) allowing the device to remain -sufficiently active to generate a wakeup signal. - -1.3. ACPI Device Power Management ---------------------------------- -The platform firmware support for the power management of PCI devices is -system-specific. However, if the system in question is compliant with the -Advanced Configuration and Power Interface (ACPI) Specification, like the -majority of x86-based systems, it is supposed to implement device power -management interfaces defined by the ACPI standard. - -For this purpose the ACPI BIOS provides special functions called "control -methods" that may be executed by the kernel to perform specific tasks, such as -putting a device into a low-power state. These control methods are encoded -using special byte-code language called the ACPI Machine Language (AML) and -stored in the machine's BIOS. The kernel loads them from the BIOS and executes -them as needed using an AML interpreter that translates the AML byte code into -computations and memory or I/O space accesses. This way, in theory, a BIOS -writer can provide the kernel with a means to perform actions depending -on the system design in a system-specific fashion. - -ACPI control methods may be divided into global control methods, that are not -associated with any particular devices, and device control methods, that have -to be defined separately for each device supposed to be handled with the help of -the platform. This means, in particular, that ACPI device control methods can -only be used to handle devices that the BIOS writer knew about in advance. The -ACPI methods used for device power management fall into that category. - -The ACPI specification assumes that devices can be in one of four power states -labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM -D0-D3 states (although the difference between D3hot and D3cold is not taken -into account by ACPI). Moreover, for each power state of a device there is a -set of power resources that have to be enabled for the device to be put into -that state. These power resources are controlled (i.e. enabled or disabled) -with the help of their own control methods, _ON and _OFF, that have to be -defined individually for each of them. - -To put a device into the ACPI power state Dx (where x is a number between 0 and -3 inclusive) the kernel is supposed to (1) enable the power resources required -by the device in this state using their _ON control methods and (2) execute the -_PSx control method defined for the device. In addition to that, if the device -is going to be put into a low-power state (D1-D3) and is supposed to generate -wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI -3.0) control method defined for it has to be executed before _PSx. Power -resources that are not required by the device in the target power state and are -not required any more by any other device should be disabled (by executing their -_OFF control methods). If the current power state of the device is D3, it can -only be put into D0 this way. - -However, quite often the power states of devices are changed during a -system-wide transition into a sleep state or back into the working state. ACPI -defines four system sleep states, S1, S2, S3, and S4, and denotes the system -working state as S0. In general, the target system sleep (or working) state -determines the highest power (lowest number) state the device can be put -into and the kernel is supposed to obtain this information by executing the -device's _SxD control method (where x is a number between 0 and 4 inclusive). -If the device is required to wake up the system from the target sleep state, the -lowest power (highest number) state it can be put into is also determined by the -target state of the system. The kernel is then supposed to use the device's -_SxW control method to obtain the number of that state. It also is supposed to -use the device's _PRW control method to learn which power resources need to be -enabled for the device to be able to generate wakeup signals. - -1.4. Wakeup Signaling ---------------------- -Wakeup signals generated by PCI devices, either as native PCI PMEs, or as -a result of the execution of the _DSW (or _PSW) ACPI control method before -putting the device into a low-power state, have to be caught and handled as -appropriate. If they are sent while the system is in the working state -(ACPI S0), they should be translated into interrupts so that the kernel can -put the devices generating them into the full-power state and take care of the -events that triggered them. In turn, if they are sent while the system is -sleeping, they should cause the system's core logic to trigger wakeup. - -On ACPI-based systems wakeup signals sent by conventional PCI devices are -converted into ACPI General-Purpose Events (GPEs) which are hardware signals -from the system core logic generated in response to various events that need to -be acted upon. Every GPE is associated with one or more sources of potentially -interesting events. In particular, a GPE may be associated with a PCI device -capable of signaling wakeup. The information on the connections between GPEs -and event sources is recorded in the system's ACPI BIOS from where it can be -read by the kernel. - -If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE -associated with it (if there is one) is triggered. The GPEs associated with PCI -bridges may also be triggered in response to a wakeup signal from one of the -devices below the bridge (this also is the case for root bridges) and, for -example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be -handled this way. - -A GPE may be triggered when the system is sleeping (i.e. when it is in one of -the ACPI S1-S4 states), in which case system wakeup is started by its core logic -(the device that was the source of the signal causing the system wakeup to occur -may be identified later). The GPEs used in such situations are referred to as -wakeup GPEs. - -Usually, however, GPEs are also triggered when the system is in the working -state (ACPI S0) and in that case the system's core logic generates a System -Control Interrupt (SCI) to notify the kernel of the event. Then, the SCI -handler identifies the GPE that caused the interrupt to be generated which, -in turn, allows the kernel to identify the source of the event (that may be -a PCI device signaling wakeup). The GPEs used for notifying the kernel of -events occurring while the system is in the working state are referred to as -runtime GPEs. - -Unfortunately, there is no standard way of handling wakeup signals sent by -conventional PCI devices on systems that are not ACPI-based, but there is one -for PCI Express devices. Namely, the PCI Express Base Specification introduced -a native mechanism for converting native PCI PMEs into interrupts generated by -root ports. For conventional PCI devices native PMEs are out-of-band, so they -are routed separately and they need not pass through bridges (in principle they -may be routed directly to the system's core logic), but for PCI Express devices -they are in-band messages that have to pass through the PCI Express hierarchy, -including the root port on the path from the device to the Root Complex. Thus -it was possible to introduce a mechanism by which a root port generates an -interrupt whenever it receives a PME message from one of the devices below it. -The PCI Express Requester ID of the device that sent the PME message is then -recorded in one of the root port's configuration registers from where it may be -read by the interrupt handler allowing the device to be identified. [PME -messages sent by PCI Express endpoints integrated with the Root Complex don't -pass through root ports, but instead they cause a Root Complex Event Collector -(if there is one) to generate interrupts.] - -In principle the native PCI Express PME signaling may also be used on ACPI-based -systems along with the GPEs, but to use it the kernel has to ask the system's -ACPI BIOS to release control of root port configuration registers. The ACPI -BIOS, however, is not required to allow the kernel to control these registers -and if it doesn't do that, the kernel must not modify their contents. Of course -the native PCI Express PME signaling cannot be used by the kernel in that case. - - -2. PCI Subsystem and Device Power Management -============================================ - -2.1. Device Power Management Callbacks --------------------------------------- -The PCI Subsystem participates in the power management of PCI devices in a -number of ways. First of all, it provides an intermediate code layer between -the device power management core (PM core) and PCI device drivers. -Specifically, the pm field of the PCI subsystem's struct bus_type object, -pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing -pointers to several device power management callbacks: - -const struct dev_pm_ops pci_dev_pm_ops = { - .prepare = pci_pm_prepare, - .complete = pci_pm_complete, - .suspend = pci_pm_suspend, - .resume = pci_pm_resume, - .freeze = pci_pm_freeze, - .thaw = pci_pm_thaw, - .poweroff = pci_pm_poweroff, - .restore = pci_pm_restore, - .suspend_noirq = pci_pm_suspend_noirq, - .resume_noirq = pci_pm_resume_noirq, - .freeze_noirq = pci_pm_freeze_noirq, - .thaw_noirq = pci_pm_thaw_noirq, - .poweroff_noirq = pci_pm_poweroff_noirq, - .restore_noirq = pci_pm_restore_noirq, - .runtime_suspend = pci_pm_runtime_suspend, - .runtime_resume = pci_pm_runtime_resume, - .runtime_idle = pci_pm_runtime_idle, -}; - -These callbacks are executed by the PM core in various situations related to -device power management and they, in turn, execute power management callbacks -provided by PCI device drivers. They also perform power management operations -involving some standard configuration registers of PCI devices that device -drivers need not know or care about. - -The structure representing a PCI device, struct pci_dev, contains several fields -that these callbacks operate on: - -struct pci_dev { - ... - pci_power_t current_state; /* Current operating state. */ - int pm_cap; /* PM capability offset in the - configuration space */ - unsigned int pme_support:5; /* Bitmask of states from which PME# - can be generated */ - unsigned int pme_interrupt:1;/* Is native PCIe PME signaling used? */ - unsigned int d1_support:1; /* Low power state D1 is supported */ - unsigned int d2_support:1; /* Low power state D2 is supported */ - unsigned int no_d1d2:1; /* D1 and D2 are forbidden */ - unsigned int wakeup_prepared:1; /* Device prepared for wake up */ - unsigned int d3_delay; /* D3->D0 transition time in ms */ - ... -}; - -They also indirectly use some fields of the struct device that is embedded in -struct pci_dev. - -2.2. Device Initialization --------------------------- -The PCI subsystem's first task related to device power management is to -prepare the device for power management and initialize the fields of struct -pci_dev used for this purpose. This happens in two functions defined in -drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init(). - -The first of these functions checks if the device supports native PCI PM -and if that's the case the offset of its power management capability structure -in the configuration space is stored in the pm_cap field of the device's struct -pci_dev object. Next, the function checks which PCI low-power states are -supported by the device and from which low-power states the device can generate -native PCI PMEs. The power management fields of the device's struct pci_dev and -the struct device embedded in it are updated accordingly and the generation of -PMEs by the device is disabled. - -The second function checks if the device can be prepared to signal wakeup with -the help of the platform firmware, such as the ACPI BIOS. If that is the case, -the function updates the wakeup fields in struct device embedded in the -device's struct pci_dev and uses the firmware-provided method to prevent the -device from signaling wakeup. - -At this point the device is ready for power management. For driverless devices, -however, this functionality is limited to a few basic operations carried out -during system-wide transitions to a sleep state and back to the working state. - -2.3. Runtime Device Power Management ------------------------------------- -The PCI subsystem plays a vital role in the runtime power management of PCI -devices. For this purpose it uses the general runtime power management -(runtime PM) framework described in Documentation/power/runtime_pm.txt. -Namely, it provides subsystem-level callbacks: - - pci_pm_runtime_suspend() - pci_pm_runtime_resume() - pci_pm_runtime_idle() - -that are executed by the core runtime PM routines. It also implements the -entire mechanics necessary for handling runtime wakeup signals from PCI devices -in low-power states, which at the time of this writing works for both the native -PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in -Section 1. - -First, a PCI device is put into a low-power state, or suspended, with the help -of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call -pci_pm_runtime_suspend() to do the actual job. For this to work, the device's -driver has to provide a pm->runtime_suspend() callback (see below), which is -run by pci_pm_runtime_suspend() as the first action. If the driver's callback -returns successfully, the device's standard configuration registers are saved, -the device is prepared to generate wakeup signals and, finally, it is put into -the target low-power state. - -The low-power state to put the device into is the lowest-power (highest number) -state from which it can signal wakeup. The exact method of signaling wakeup is -system-dependent and is determined by the PCI subsystem on the basis of the -reported capabilities of the device and the platform firmware. To prepare the -device for signaling wakeup and put it into the selected low-power state, the -PCI subsystem can use the platform firmware as well as the device's native PCI -PM capabilities, if supported. - -It is expected that the device driver's pm->runtime_suspend() callback will -not attempt to prepare the device for signaling wakeup or to put it into a -low-power state. The driver ought to leave these tasks to the PCI subsystem -that has all of the information necessary to perform them. - -A suspended device is brought back into the "active" state, or resumed, -with the help of pm_request_resume() or pm_runtime_resume() which both call -pci_pm_runtime_resume() for PCI devices. Again, this only works if the device's -driver provides a pm->runtime_resume() callback (see below). However, before -the driver's callback is executed, pci_pm_runtime_resume() brings the device -back into the full-power state, prevents it from signaling wakeup while in that -state and restores its standard configuration registers. Thus the driver's -callback need not worry about the PCI-specific aspects of the device resume. - -Note that generally pci_pm_runtime_resume() may be called in two different -situations. First, it may be called at the request of the device's driver, for -example if there are some data for it to process. Second, it may be called -as a result of a wakeup signal from the device itself (this sometimes is -referred to as "remote wakeup"). Of course, for this purpose the wakeup signal -is handled in one of the ways described in Section 1 and finally converted into -a notification for the PCI subsystem after the source device has been -identified. - -The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle() -and pm_request_idle(), executes the device driver's pm->runtime_idle() -callback, if defined, and if that callback doesn't return error code (or is not -present at all), suspends the device with the help of pm_runtime_suspend(). -Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for -example, it is called right after the device has just been resumed), in which -cases it is expected to suspend the device if that makes sense. Usually, -however, the PCI subsystem doesn't really know if the device really can be -suspended, so it lets the device's driver decide by running its -pm->runtime_idle() callback. - -2.4. System-Wide Power Transitions ----------------------------------- -There are a few different types of system-wide power transitions, described in -Documentation/driver-api/pm/devices.rst. Each of them requires devices to be handled -in a specific way and the PM core executes subsystem-level power management -callbacks for this purpose. They are executed in phases such that each phase -involves executing the same subsystem-level callback for every device belonging -to the given subsystem before the next phase begins. These phases always run -after tasks have been frozen. - -2.4.1. System Suspend - -When the system is going into a sleep state in which the contents of memory will -be preserved, such as one of the ACPI sleep states S1-S3, the phases are: - - prepare, suspend, suspend_noirq. - -The following PCI bus type's callbacks, respectively, are used in these phases: - - pci_pm_prepare() - pci_pm_suspend() - pci_pm_suspend_noirq() - -The pci_pm_prepare() routine first puts the device into the "fully functional" -state with the help of pm_runtime_resume(). Then, it executes the device -driver's pm->prepare() callback if defined (i.e. if the driver's struct -dev_pm_ops object is present and the prepare pointer in that object is valid). - -The pci_pm_suspend() routine first checks if the device's driver implements -legacy PCI suspend routines (see Section 3), in which case the driver's legacy -suspend callback is executed, if present, and its result is returned. Next, if -the device's driver doesn't provide a struct dev_pm_ops object (containing -pointers to the driver's callbacks), pci_pm_default_suspend() is called, which -simply turns off the device's bus master capability and runs -pcibios_disable_device() to disable it, unless the device is a bridge (PCI -bridges are ignored by this routine). Next, the device driver's pm->suspend() -callback is executed, if defined, and its result is returned if it fails. -Finally, pci_fixup_device() is called to apply hardware suspend quirks related -to the device if necessary. - -Note that the suspend phase is carried out asynchronously for PCI devices, so -the pci_pm_suspend() callback may be executed in parallel for any pair of PCI -devices that don't depend on each other in a known way (i.e. none of the paths -in the device tree from the root bridge to a leaf device contains both of them). - -The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has -been called, which means that the device driver's interrupt handler won't be -invoked while this routine is running. It first checks if the device's driver -implements legacy PCI suspends routines (Section 3), in which case the legacy -late suspend routine is called and its result is returned (the standard -configuration registers of the device are saved if the driver's callback hasn't -done that). Second, if the device driver's struct dev_pm_ops object is not -present, the device's standard configuration registers are saved and the routine -returns success. Otherwise the device driver's pm->suspend_noirq() callback is -executed, if present, and its result is returned if it fails. Next, if the -device's standard configuration registers haven't been saved yet (one of the -device driver's callbacks executed before might do that), pci_pm_suspend_noirq() -saves them, prepares the device to signal wakeup (if necessary) and puts it into -a low-power state. - -The low-power state to put the device into is the lowest-power (highest number) -state from which it can signal wakeup while the system is in the target sleep -state. Just like in the runtime PM case described above, the mechanism of -signaling wakeup is system-dependent and determined by the PCI subsystem, which -is also responsible for preparing the device to signal wakeup from the system's -target sleep state as appropriate. - -PCI device drivers (that don't implement legacy power management callbacks) are -generally not expected to prepare devices for signaling wakeup or to put them -into low-power states. However, if one of the driver's suspend callbacks -(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration -registers, pci_pm_suspend_noirq() will assume that the device has been prepared -to signal wakeup and put into a low-power state by the driver (the driver is -then assumed to have used the helper functions provided by the PCI subsystem for -this purpose). PCI device drivers are not encouraged to do that, but in some -rare cases doing that in the driver may be the optimum approach. - -2.4.2. System Resume - -When the system is undergoing a transition from a sleep state in which the -contents of memory have been preserved, such as one of the ACPI sleep states -S1-S3, into the working state (ACPI S0), the phases are: - - resume_noirq, resume, complete. - -The following PCI bus type's callbacks, respectively, are executed in these -phases: - - pci_pm_resume_noirq() - pci_pm_resume() - pci_pm_complete() - -The pci_pm_resume_noirq() routine first puts the device into the full-power -state, restores its standard configuration registers and applies early resume -hardware quirks related to the device, if necessary. This is done -unconditionally, regardless of whether or not the device's driver implements -legacy PCI power management callbacks (this way all PCI devices are in the -full-power state and their standard configuration registers have been restored -when their interrupt handlers are invoked for the first time during resume, -which allows the kernel to avoid problems with the handling of shared interrupts -by drivers whose devices are still suspended). If legacy PCI power management -callbacks (see Section 3) are implemented by the device's driver, the legacy -early resume callback is executed and its result is returned. Otherwise, the -device driver's pm->resume_noirq() callback is executed, if defined, and its -result is returned. - -The pci_pm_resume() routine first checks if the device's standard configuration -registers have been restored and restores them if that's not the case (this -only is necessary in the error path during a failing suspend). Next, resume -hardware quirks related to the device are applied, if necessary, and if the -device's driver implements legacy PCI power management callbacks (see -Section 3), the driver's legacy resume callback is executed and its result is -returned. Otherwise, the device's wakeup signaling mechanisms are blocked and -its driver's pm->resume() callback is executed, if defined (the callback's -result is then returned). - -The resume phase is carried out asynchronously for PCI devices, like the -suspend phase described above, which means that if two PCI devices don't depend -on each other in a known way, the pci_pm_resume() routine may be executed for -the both of them in parallel. - -The pci_pm_complete() routine only executes the device driver's pm->complete() -callback, if defined. - -2.4.3. System Hibernation - -System hibernation is more complicated than system suspend, because it requires -a system image to be created and written into a persistent storage medium. The -image is created atomically and all devices are quiesced, or frozen, before that -happens. - -The freezing of devices is carried out after enough memory has been freed (at -the time of this writing the image creation requires at least 50% of system RAM -to be free) in the following three phases: - - prepare, freeze, freeze_noirq - -that correspond to the PCI bus type's callbacks: - - pci_pm_prepare() - pci_pm_freeze() - pci_pm_freeze_noirq() - -This means that the prepare phase is exactly the same as for system suspend. -The other two phases, however, are different. - -The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs -the device driver's pm->freeze() callback, if defined, instead of pm->suspend(), -and it doesn't apply the suspend-related hardware quirks. It is executed -asynchronously for different PCI devices that don't depend on each other in a -known way. - -The pci_pm_freeze_noirq() routine, in turn, is similar to -pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq() -routine instead of pm->suspend_noirq(). It also doesn't attempt to prepare the -device for signaling wakeup and put it into a low-power state. Still, it saves -the device's standard configuration registers if they haven't been saved by one -of the driver's callbacks. - -Once the image has been created, it has to be saved. However, at this point all -devices are frozen and they cannot handle I/O, while their ability to handle -I/O is obviously necessary for the image saving. Thus they have to be brought -back to the fully functional state and this is done in the following phases: - - thaw_noirq, thaw, complete - -using the following PCI bus type's callbacks: - - pci_pm_thaw_noirq() - pci_pm_thaw() - pci_pm_complete() - -respectively. - -The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(), -but it doesn't put the device into the full power state and doesn't attempt to -restore its standard configuration registers. It also executes the device -driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq(). - -The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device -driver's pm->thaw() callback instead of pm->resume(). It is executed -asynchronously for different PCI devices that don't depend on each other in a -known way. - -The complete phase it the same as for system resume. - -After saving the image, devices need to be powered down before the system can -enter the target sleep state (ACPI S4 for ACPI-based systems). This is done in -three phases: - - prepare, poweroff, poweroff_noirq - -where the prepare phase is exactly the same as for system suspend. The other -two phases are analogous to the suspend and suspend_noirq phases, respectively. -The PCI subsystem-level callbacks they correspond to - - pci_pm_poweroff() - pci_pm_poweroff_noirq() - -work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively, -although they don't attempt to save the device's standard configuration -registers. - -2.4.4. System Restore - -System restore requires a hibernation image to be loaded into memory and the -pre-hibernation memory contents to be restored before the pre-hibernation system -activity can be resumed. - -As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded -into memory by a fresh instance of the kernel, called the boot kernel, which in -turn is loaded and run by a boot loader in the usual way. After the boot kernel -has loaded the image, it needs to replace its own code and data with the code -and data of the "hibernated" kernel stored within the image, called the image -kernel. For this purpose all devices are frozen just like before creating -the image during hibernation, in the - - prepare, freeze, freeze_noirq - -phases described above. However, the devices affected by these phases are only -those having drivers in the boot kernel; other devices will still be in whatever -state the boot loader left them. - -Should the restoration of the pre-hibernation memory contents fail, the boot -kernel would go through the "thawing" procedure described above, using the -thaw_noirq, thaw, and complete phases (that will only affect the devices having -drivers in the boot kernel), and then continue running normally. - -If the pre-hibernation memory contents are restored successfully, which is the -usual situation, control is passed to the image kernel, which then becomes -responsible for bringing the system back to the working state. To achieve this, -it must restore the devices' pre-hibernation functionality, which is done much -like waking up from the memory sleep state, although it involves different -phases: - - restore_noirq, restore, complete - -The first two of these are analogous to the resume_noirq and resume phases -described above, respectively, and correspond to the following PCI subsystem -callbacks: - - pci_pm_restore_noirq() - pci_pm_restore() - -These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(), -respectively, but they execute the device driver's pm->restore_noirq() and -pm->restore() callbacks, if available. - -The complete phase is carried out in exactly the same way as during system -resume. - - -3. PCI Device Drivers and Power Management -========================================== - -3.1. Power Management Callbacks -------------------------------- -PCI device drivers participate in power management by providing callbacks to be -executed by the PCI subsystem's power management routines described above and by -controlling the runtime power management of their devices. - -At the time of this writing there are two ways to define power management -callbacks for a PCI device driver, the recommended one, based on using a -dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the -"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and -.resume() callbacks from struct pci_driver are used. The legacy approach, -however, doesn't allow one to define runtime power management callbacks and is -not really suitable for any new drivers. Therefore it is not covered by this -document (refer to the source code to learn more about it). - -It is recommended that all PCI device drivers define a struct dev_pm_ops object -containing pointers to power management (PM) callbacks that will be executed by -the PCI subsystem's PM routines in various circumstances. A pointer to the -driver's struct dev_pm_ops object has to be assigned to the driver.pm field in -its struct pci_driver object. Once that has happened, the "legacy" PM callbacks -in struct pci_driver are ignored (even if they are not NULL). - -The PM callbacks in struct dev_pm_ops are not mandatory and if they are not -defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI -subsystem will handle the device in a simplified default manner. If they are -defined, though, they are expected to behave as described in the following -subsections. - -3.1.1. prepare() - -The prepare() callback is executed during system suspend, during hibernation -(when a hibernation image is about to be created), during power-off after -saving a hibernation image and during system restore, when a hibernation image -has just been loaded into memory. - -This callback is only necessary if the driver's device has children that in -general may be registered at any time. In that case the role of the prepare() -callback is to prevent new children of the device from being registered until -one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run. - -In addition to that the prepare() callback may carry out some operations -preparing the device to be suspended, although it should not allocate memory -(if additional memory is required to suspend the device, it has to be -preallocated earlier, for example in a suspend/hibernate notifier as described -in Documentation/driver-api/pm/notifiers.rst). - -3.1.2. suspend() - -The suspend() callback is only executed during system suspend, after prepare() -callbacks have been executed for all devices in the system. - -This callback is expected to quiesce the device and prepare it to be put into a -low-power state by the PCI subsystem. It is not required (in fact it even is -not recommended) that a PCI driver's suspend() callback save the standard -configuration registers of the device, prepare it for waking up the system, or -put it into a low-power state. All of these operations can very well be taken -care of by the PCI subsystem, without the driver's participation. - -However, in some rare case it is convenient to carry out these operations in -a PCI driver. Then, pci_save_state(), pci_prepare_to_sleep(), and -pci_set_power_state() should be used to save the device's standard configuration -registers, to prepare it for system wakeup (if necessary), and to put it into a -low-power state, respectively. Moreover, if the driver calls pci_save_state(), -the PCI subsystem will not execute either pci_prepare_to_sleep(), or -pci_set_power_state() for its device, so the driver is then responsible for -handling the device as appropriate. - -While the suspend() callback is being executed, the driver's interrupt handler -can be invoked to handle an interrupt from the device, so all suspend-related -operations relying on the driver's ability to handle interrupts should be -carried out in this callback. - -3.1.3. suspend_noirq() - -The suspend_noirq() callback is only executed during system suspend, after -suspend() callbacks have been executed for all devices in the system and -after device interrupts have been disabled by the PM core. - -The difference between suspend_noirq() and suspend() is that the driver's -interrupt handler will not be invoked while suspend_noirq() is running. Thus -suspend_noirq() can carry out operations that would cause race conditions to -arise if they were performed in suspend(). - -3.1.4. freeze() - -The freeze() callback is hibernation-specific and is executed in two situations, -during hibernation, after prepare() callbacks have been executed for all devices -in preparation for the creation of a system image, and during restore, -after a system image has been loaded into memory from persistent storage and the -prepare() callbacks have been executed for all devices. - -The role of this callback is analogous to the role of the suspend() callback -described above. In fact, they only need to be different in the rare cases when -the driver takes the responsibility for putting the device into a low-power -state. - -In that cases the freeze() callback should not prepare the device system wakeup -or put it into a low-power state. Still, either it or freeze_noirq() should -save the device's standard configuration registers using pci_save_state(). - -3.1.5. freeze_noirq() - -The freeze_noirq() callback is hibernation-specific. It is executed during -hibernation, after prepare() and freeze() callbacks have been executed for all -devices in preparation for the creation of a system image, and during restore, -after a system image has been loaded into memory and after prepare() and -freeze() callbacks have been executed for all devices. It is always executed -after device interrupts have been disabled by the PM core. - -The role of this callback is analogous to the role of the suspend_noirq() -callback described above and it very rarely is necessary to define -freeze_noirq(). - -The difference between freeze_noirq() and freeze() is analogous to the -difference between suspend_noirq() and suspend(). - -3.1.6. poweroff() - -The poweroff() callback is hibernation-specific. It is executed when the system -is about to be powered off after saving a hibernation image to a persistent -storage. prepare() callbacks are executed for all devices before poweroff() is -called. - -The role of this callback is analogous to the role of the suspend() and freeze() -callbacks described above, although it does not need to save the contents of -the device's registers. In particular, if the driver wants to put the device -into a low-power state itself instead of allowing the PCI subsystem to do that, -the poweroff() callback should use pci_prepare_to_sleep() and -pci_set_power_state() to prepare the device for system wakeup and to put it -into a low-power state, respectively, but it need not save the device's standard -configuration registers. - -3.1.7. poweroff_noirq() - -The poweroff_noirq() callback is hibernation-specific. It is executed after -poweroff() callbacks have been executed for all devices in the system. - -The role of this callback is analogous to the role of the suspend_noirq() and -freeze_noirq() callbacks described above, but it does not need to save the -contents of the device's registers. - -The difference between poweroff_noirq() and poweroff() is analogous to the -difference between suspend_noirq() and suspend(). - -3.1.8. resume_noirq() - -The resume_noirq() callback is only executed during system resume, after the -PM core has enabled the non-boot CPUs. The driver's interrupt handler will not -be invoked while resume_noirq() is running, so this callback can carry out -operations that might race with the interrupt handler. - -Since the PCI subsystem unconditionally puts all devices into the full power -state in the resume_noirq phase of system resume and restores their standard -configuration registers, resume_noirq() is usually not necessary. In general -it should only be used for performing operations that would lead to race -conditions if carried out by resume(). - -3.1.9. resume() - -The resume() callback is only executed during system resume, after -resume_noirq() callbacks have been executed for all devices in the system and -device interrupts have been enabled by the PM core. - -This callback is responsible for restoring the pre-suspend configuration of the -device and bringing it back to the fully functional state. The device should be -able to process I/O in a usual way after resume() has returned. - -3.1.10. thaw_noirq() - -The thaw_noirq() callback is hibernation-specific. It is executed after a -system image has been created and the non-boot CPUs have been enabled by the PM -core, in the thaw_noirq phase of hibernation. It also may be executed if the -loading of a hibernation image fails during system restore (it is then executed -after enabling the non-boot CPUs). The driver's interrupt handler will not be -invoked while thaw_noirq() is running. - -The role of this callback is analogous to the role of resume_noirq(). The -difference between these two callbacks is that thaw_noirq() is executed after -freeze() and freeze_noirq(), so in general it does not need to modify the -contents of the device's registers. - -3.1.11. thaw() - -The thaw() callback is hibernation-specific. It is executed after thaw_noirq() -callbacks have been executed for all devices in the system and after device -interrupts have been enabled by the PM core. - -This callback is responsible for restoring the pre-freeze configuration of -the device, so that it will work in a usual way after thaw() has returned. - -3.1.12. restore_noirq() - -The restore_noirq() callback is hibernation-specific. It is executed in the -restore_noirq phase of hibernation, when the boot kernel has passed control to -the image kernel and the non-boot CPUs have been enabled by the image kernel's -PM core. - -This callback is analogous to resume_noirq() with the exception that it cannot -make any assumption on the previous state of the device, even if the BIOS (or -generally the platform firmware) is known to preserve that state over a -suspend-resume cycle. - -For the vast majority of PCI device drivers there is no difference between -resume_noirq() and restore_noirq(). - -3.1.13. restore() - -The restore() callback is hibernation-specific. It is executed after -restore_noirq() callbacks have been executed for all devices in the system and -after the PM core has enabled device drivers' interrupt handlers to be invoked. - -This callback is analogous to resume(), just like restore_noirq() is analogous -to resume_noirq(). Consequently, the difference between restore_noirq() and -restore() is analogous to the difference between resume_noirq() and resume(). - -For the vast majority of PCI device drivers there is no difference between -resume() and restore(). - -3.1.14. complete() - -The complete() callback is executed in the following situations: - - during system resume, after resume() callbacks have been executed for all - devices, - - during hibernation, before saving the system image, after thaw() callbacks - have been executed for all devices, - - during system restore, when the system is going back to its pre-hibernation - state, after restore() callbacks have been executed for all devices. -It also may be executed if the loading of a hibernation image into memory fails -(in that case it is run after thaw() callbacks have been executed for all -devices that have drivers in the boot kernel). - -This callback is entirely optional, although it may be necessary if the -prepare() callback performs operations that need to be reversed. - -3.1.15. runtime_suspend() - -The runtime_suspend() callback is specific to device runtime power management -(runtime PM). It is executed by the PM core's runtime PM framework when the -device is about to be suspended (i.e. quiesced and put into a low-power state) -at run time. - -This callback is responsible for freezing the device and preparing it to be -put into a low-power state, but it must allow the PCI subsystem to perform all -of the PCI-specific actions necessary for suspending the device. - -3.1.16. runtime_resume() - -The runtime_resume() callback is specific to device runtime PM. It is executed -by the PM core's runtime PM framework when the device is about to be resumed -(i.e. put into the full-power state and programmed to process I/O normally) at -run time. - -This callback is responsible for restoring the normal functionality of the -device after it has been put into the full-power state by the PCI subsystem. -The device is expected to be able to process I/O in the usual way after -runtime_resume() has returned. - -3.1.17. runtime_idle() - -The runtime_idle() callback is specific to device runtime PM. It is executed -by the PM core's runtime PM framework whenever it may be desirable to suspend -the device according to the PM core's information. In particular, it is -automatically executed right after runtime_resume() has returned in case the -resume of the device has happened as a result of a spurious event. - -This callback is optional, but if it is not implemented or if it returns 0, the -PCI subsystem will call pm_runtime_suspend() for the device, which in turn will -cause the driver's runtime_suspend() callback to be executed. - -3.1.18. Pointing Multiple Callback Pointers to One Routine - -Although in principle each of the callbacks described in the previous -subsections can be defined as a separate function, it often is convenient to -point two or more members of struct dev_pm_ops to the same routine. There are -a few convenience macros that can be used for this purpose. - -The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one -suspend routine pointed to by the .suspend(), .freeze(), and .poweroff() -members and one resume routine pointed to by the .resume(), .thaw(), and -.restore() members. The other function pointers in this struct dev_pm_ops are -unset. - -The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it -additionally sets the .runtime_resume() pointer to the same value as -.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to -the same value as .suspend() (and .freeze() and .poweroff()). - -The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct -dev_pm_ops to indicate that one suspend routine is to be pointed to by the -.suspend(), .freeze(), and .poweroff() members and one resume routine is to -be pointed to by the .resume(), .thaw(), and .restore() members. - -3.1.19. Driver Flags for Power Management - -The PM core allows device drivers to set flags that influence the handling of -power management for the devices by the core itself and by middle layer code -including the PCI bus type. The flags should be set once at the driver probe -time with the help of the dev_pm_set_driver_flags() function and they should not -be updated directly afterwards. - -The DPM_FLAG_NEVER_SKIP flag prevents the PM core from using the direct-complete -mechanism allowing device suspend/resume callbacks to be skipped if the device -is in runtime suspend when the system suspend starts. That also affects all of -the ancestors of the device, so this flag should only be used if absolutely -necessary. - -The DPM_FLAG_SMART_PREPARE flag instructs the PCI bus type to only return a -positive value from pci_pm_prepare() if the ->prepare callback provided by the -driver of the device returns a positive value. That allows the driver to opt -out from using the direct-complete mechanism dynamically. - -The DPM_FLAG_SMART_SUSPEND flag tells the PCI bus type that from the driver's -perspective the device can be safely left in runtime suspend during system -suspend. That causes pci_pm_suspend(), pci_pm_freeze() and pci_pm_poweroff() -to skip resuming the device from runtime suspend unless there are PCI-specific -reasons for doing that. Also, it causes pci_pm_suspend_late/noirq(), -pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq() to return early -if the device remains in runtime suspend in the beginning of the "late" phase -of the system-wide transition under way. Moreover, if the device is in -runtime suspend in pci_pm_resume_noirq() or pci_pm_restore_noirq(), its runtime -power management status will be changed to "active" (as it is going to be put -into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(), -the function will set the power.direct_complete flag for it (to make the PM core -skip the subsequent "thaw" callbacks for it) and return. - -Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the -device to be left in suspend after system-wide transitions to the working state. -This flag is checked by the PM core, but the PCI bus type informs the PM core -which devices may be left in suspend from its perspective (that happens during -the "noirq" phase of system-wide suspend and analogous transitions) and next it -uses the dev_pm_may_skip_resume() helper to decide whether or not to return from -pci_pm_resume_noirq() early, as the PM core will skip the remaining resume -callbacks for the device during the transition under way and will set its -runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for -it. - -3.2. Device Runtime Power Management ------------------------------------- -In addition to providing device power management callbacks PCI device drivers -are responsible for controlling the runtime power management (runtime PM) of -their devices. - -The PCI device runtime PM is optional, but it is recommended that PCI device -drivers implement it at least in the cases where there is a reliable way of -verifying that the device is not used (like when the network cable is detached -from an Ethernet adapter or there are no devices attached to a USB controller). - -To support the PCI runtime PM the driver first needs to implement the -runtime_suspend() and runtime_resume() callbacks. It also may need to implement -the runtime_idle() callback to prevent the device from being suspended again -every time right after the runtime_resume() callback has returned -(alternatively, the runtime_suspend() callback will have to check if the -device should really be suspended and return -EAGAIN if that is not the case). - -The runtime PM of PCI devices is enabled by default by the PCI core. PCI -device drivers do not need to enable it and should not attempt to do so. -However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid() -helper function. In addition to that, the runtime PM usage counter of -each PCI device is incremented by local_pci_probe() before executing the -probe callback provided by the device's driver. - -If a PCI driver implements the runtime PM callbacks and intends to use the -runtime PM framework provided by the PM core and the PCI subsystem, it needs -to decrement the device's runtime PM usage counter in its probe callback -function. If it doesn't do that, the counter will always be different from -zero for the device and it will never be runtime-suspended. The simplest -way to do that is by calling pm_runtime_put_noidle(), but if the driver -wants to schedule an autosuspend right away, for example, it may call -pm_runtime_put_autosuspend() instead for this purpose. Generally, it -just needs to call a function that decrements the devices usage counter -from its probe routine to make runtime PM work for the device. - -It is important to remember that the driver's runtime_suspend() callback -may be executed right after the usage counter has been decremented, because -user space may already have caused the pm_runtime_allow() helper function -unblocking the runtime PM of the device to run via sysfs, so the driver must -be prepared to cope with that. - -The driver itself should not call pm_runtime_allow(), though. Instead, it -should let user space or some platform-specific code do that (user space can -do it via sysfs as stated above), but it must be prepared to handle the -runtime PM of the device correctly as soon as pm_runtime_allow() is called -(which may happen at any time, even before the driver is loaded). - -When the driver's remove callback runs, it has to balance the decrementation -of the device's runtime PM usage counter at the probe time. For this reason, -if it has decremented the counter in its probe callback, it must run -pm_runtime_get_noresume() in its remove callback. [Since the core carries -out a runtime resume of the device and bumps up the device's usage counter -before running the driver's remove callback, the runtime PM of the device -is effectively disabled for the duration of the remove execution and all -runtime PM helper functions incrementing the device's usage counter are -then effectively equivalent to pm_runtime_get_noresume().] - -The runtime PM framework works by processing requests to suspend or resume -devices, or to check if they are idle (in which cases it is reasonable to -subsequently request that they be suspended). These requests are represented -by work items put into the power management workqueue, pm_wq. Although there -are a few situations in which power management requests are automatically -queued by the PM core (for example, after processing a request to resume a -device the PM core automatically queues a request to check if the device is -idle), device drivers are generally responsible for queuing power management -requests for their devices. For this purpose they should use the runtime PM -helper functions provided by the PM core, discussed in -Documentation/power/runtime_pm.txt. - -Devices can also be suspended and resumed synchronously, without placing a -request into pm_wq. In the majority of cases this also is done by their -drivers that use helper functions provided by the PM core for this purpose. - -For more information on the runtime PM of devices refer to -Documentation/power/runtime_pm.txt. - - -4. Resources -============ - -PCI Local Bus Specification, Rev. 3.0 -PCI Bus Power Management Interface Specification, Rev. 1.2 -Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b -PCI Express Base Specification, Rev. 2.0 -Documentation/driver-api/pm/devices.rst -Documentation/power/runtime_pm.txt diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst new file mode 100644 index 000000000000..945fc6d760c9 --- /dev/null +++ b/Documentation/power/pm_qos_interface.rst @@ -0,0 +1,225 @@ +=============================== +PM Quality Of Service Interface +=============================== + +This interface provides a kernel and user mode interface for registering +performance expectations by drivers, subsystems and user space applications on +one of the parameters. + +Two different PM QoS frameworks are available: +1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput, +memory_bandwidth. +2. the per-device PM QoS framework provides the API to manage the per-device latency +constraints and PM QoS flags. + +Each parameters have defined units: + + * latency: usec + * timeout: usec + * throughput: kbs (kilo bit / sec) + * memory bandwidth: mbs (mega bit / sec) + + +1. PM QoS framework +=================== + +The infrastructure exposes multiple misc device nodes one per implemented +parameter. The set of parameters implement is defined by pm_qos_power_init() +and pm_qos_params.h. This is done because having the available parameters +being runtime configurable or changeable from a driver was seen as too easy to +abuse. + +For each parameter a list of performance requests is maintained along with +an aggregated target value. The aggregated target value is updated with +changes to the request list or elements of the list. Typically the +aggregated target value is simply the max or min of the request values held +in the parameter list elements. +Note: the aggregated target value is implemented as an atomic variable so that +reading the aggregated value does not require any locking mechanism. + + +From kernel mode the use of this interface is simple: + +void pm_qos_add_request(handle, param_class, target_value): + Will insert an element into the list for that identified PM QoS class with the + target value. Upon change to this list the new target is recomputed and any + registered notifiers are called only if the target value is now different. + Clients of pm_qos need to save the returned handle for future use in other + pm_qos API functions. + +void pm_qos_update_request(handle, new_target_value): + Will update the list element pointed to by the handle with the new target value + and recompute the new aggregated target, calling the notification tree if the + target is changed. + +void pm_qos_remove_request(handle): + Will remove the element. After removal it will update the aggregate target and + call the notification tree if the target was changed as a result of removing + the request. + +int pm_qos_request(param_class): + Returns the aggregated value for a given PM QoS class. + +int pm_qos_request_active(handle): + Returns if the request is still active, i.e. it has not been removed from a + PM QoS class constraints list. + +int pm_qos_add_notifier(param_class, notifier): + Adds a notification callback function to the PM QoS class. The callback is + called when the aggregated value for the PM QoS class is changed. + +int pm_qos_remove_notifier(int param_class, notifier): + Removes the notification callback function for the PM QoS class. + + +From user mode: + +Only processes can register a pm_qos request. To provide for automatic +cleanup of a process, the interface requires the process to register its +parameter requests in the following way: + +To register the default pm_qos target for the specific parameter, the process +must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] + +As long as the device node is held open that process has a registered +request on the parameter. + +To change the requested target value the process needs to write an s32 value to +the open device node. Alternatively the user mode program could write a hex +string for the value using 10 char long format e.g. "0x12345678". This +translates to a pm_qos_update_request call. + +To remove the user mode request for a target value simply close the device +node. + + +2. PM QoS per-device latency and flags framework +================================================ + +For each device, there are three lists of PM QoS requests. Two of them are +maintained along with the aggregated targets of resume latency and active +state latency tolerance (in microseconds) and the third one is for PM QoS flags. +Values are updated in response to changes of the request list. + +The target values of resume latency and active state latency tolerance are +simply the minimum of the request values held in the parameter list elements. +The PM QoS flags aggregate value is a gather (bitwise OR) of all list elements' +values. One device PM QoS flag is defined currently: PM_QOS_FLAG_NO_POWER_OFF. + +Note: The aggregated target values are implemented in such a way that reading +the aggregated value does not require any locking mechanism. + + +From kernel mode the use of this interface is the following: + +int dev_pm_qos_add_request(device, handle, type, value): + Will insert an element into the list for that identified device with the + target value. Upon change to this list the new target is recomputed and any + registered notifiers are called only if the target value is now different. + Clients of dev_pm_qos need to save the handle for future use in other + dev_pm_qos API functions. + +int dev_pm_qos_update_request(handle, new_value): + Will update the list element pointed to by the handle with the new target + value and recompute the new aggregated target, calling the notification + trees if the target is changed. + +int dev_pm_qos_remove_request(handle): + Will remove the element. After removal it will update the aggregate target + and call the notification trees if the target was changed as a result of + removing the request. + +s32 dev_pm_qos_read_value(device): + Returns the aggregated value for a given device's constraints list. + +enum pm_qos_flags_status dev_pm_qos_flags(device, mask) + Check PM QoS flags of the given device against the given mask of flags. + The meaning of the return values is as follows: + + PM_QOS_FLAGS_ALL: + All flags from the mask are set + PM_QOS_FLAGS_SOME: + Some flags from the mask are set + PM_QOS_FLAGS_NONE: + No flags from the mask are set + PM_QOS_FLAGS_UNDEFINED: + The device's PM QoS structure has not been initialized + or the list of requests is empty. + +int dev_pm_qos_add_ancestor_request(dev, handle, type, value) + Add a PM QoS request for the first direct ancestor of the given device whose + power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests) + or whose power.set_latency_tolerance callback pointer is not NULL (for + DEV_PM_QOS_LATENCY_TOLERANCE requests). + +int dev_pm_qos_expose_latency_limit(device, value) + Add a request to the device's PM QoS list of resume latency constraints and + create a sysfs attribute pm_qos_resume_latency_us under the device's power + directory allowing user space to manipulate that request. + +void dev_pm_qos_hide_latency_limit(device) + Drop the request added by dev_pm_qos_expose_latency_limit() from the device's + PM QoS list of resume latency constraints and remove sysfs attribute + pm_qos_resume_latency_us from the device's power directory. + +int dev_pm_qos_expose_flags(device, value) + Add a request to the device's PM QoS list of flags and create sysfs attribute + pm_qos_no_power_off under the device's power directory allowing user space to + change the value of the PM_QOS_FLAG_NO_POWER_OFF flag. + +void dev_pm_qos_hide_flags(device) + Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list + of flags and remove sysfs attribute pm_qos_no_power_off from the device's power + directory. + +Notification mechanisms: + +The per-device PM QoS framework has a per-device notification tree. + +int dev_pm_qos_add_notifier(device, notifier): + Adds a notification callback function for the device. + The callback is called when the aggregated value of the device constraints list + is changed (for resume latency device PM QoS only). + +int dev_pm_qos_remove_notifier(device, notifier): + Removes the notification callback function for the device. + + +Active state latency tolerance +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This device PM QoS type is used to support systems in which hardware may switch +to energy-saving operation modes on the fly. In those systems, if the operation +mode chosen by the hardware attempts to save energy in an overly aggressive way, +it may cause excess latencies to be visible to software, causing it to miss +certain protocol requirements or target frame or sample rates etc. + +If there is a latency tolerance control mechanism for a given device available +to software, the .set_latency_tolerance callback in that device's dev_pm_info +structure should be populated. The routine pointed to by it is should implement +whatever is necessary to transfer the effective requirement value to the +hardware. + +Whenever the effective latency tolerance changes for the device, its +.set_latency_tolerance() callback will be executed and the effective value will +be passed to it. If that value is negative, which means that the list of +latency tolerance requirements for the device is empty, the callback is expected +to switch the underlying hardware latency tolerance control mechanism to an +autonomous mode if available. If that value is PM_QOS_LATENCY_ANY, in turn, and +the hardware supports a special "no requirement" setting, the callback is +expected to use it. That allows software to prevent the hardware from +automatically updating the device's latency tolerance in response to its power +state changes (e.g. during transitions from D3cold to D0), which generally may +be done in the autonomous latency tolerance control mode. + +If .set_latency_tolerance() is present for the device, sysfs attribute +pm_qos_latency_tolerance_us will be present in the devivce's power directory. +Then, user space can use that attribute to specify its latency tolerance +requirement for the device, if any. Writing "any" to it means "no requirement, +but do not let the hardware control latency tolerance" and writing "auto" to it +allows the hardware to be switched to the autonomous mode if there are no other +requirements from the kernel side in the device's list. + +Kernel code can use the functions described above along with the +DEV_PM_QOS_LATENCY_TOLERANCE device PM QoS type to add, remove and update +latency tolerance requirements for devices. diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt deleted file mode 100644 index 19c5f7b1a7ba..000000000000 --- a/Documentation/power/pm_qos_interface.txt +++ /dev/null @@ -1,212 +0,0 @@ -PM Quality Of Service Interface. - -This interface provides a kernel and user mode interface for registering -performance expectations by drivers, subsystems and user space applications on -one of the parameters. - -Two different PM QoS frameworks are available: -1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput, -memory_bandwidth. -2. the per-device PM QoS framework provides the API to manage the per-device latency -constraints and PM QoS flags. - -Each parameters have defined units: - * latency: usec - * timeout: usec - * throughput: kbs (kilo bit / sec) - * memory bandwidth: mbs (mega bit / sec) - - -1. PM QoS framework - -The infrastructure exposes multiple misc device nodes one per implemented -parameter. The set of parameters implement is defined by pm_qos_power_init() -and pm_qos_params.h. This is done because having the available parameters -being runtime configurable or changeable from a driver was seen as too easy to -abuse. - -For each parameter a list of performance requests is maintained along with -an aggregated target value. The aggregated target value is updated with -changes to the request list or elements of the list. Typically the -aggregated target value is simply the max or min of the request values held -in the parameter list elements. -Note: the aggregated target value is implemented as an atomic variable so that -reading the aggregated value does not require any locking mechanism. - - -From kernel mode the use of this interface is simple: - -void pm_qos_add_request(handle, param_class, target_value): -Will insert an element into the list for that identified PM QoS class with the -target value. Upon change to this list the new target is recomputed and any -registered notifiers are called only if the target value is now different. -Clients of pm_qos need to save the returned handle for future use in other -pm_qos API functions. - -void pm_qos_update_request(handle, new_target_value): -Will update the list element pointed to by the handle with the new target value -and recompute the new aggregated target, calling the notification tree if the -target is changed. - -void pm_qos_remove_request(handle): -Will remove the element. After removal it will update the aggregate target and -call the notification tree if the target was changed as a result of removing -the request. - -int pm_qos_request(param_class): -Returns the aggregated value for a given PM QoS class. - -int pm_qos_request_active(handle): -Returns if the request is still active, i.e. it has not been removed from a -PM QoS class constraints list. - -int pm_qos_add_notifier(param_class, notifier): -Adds a notification callback function to the PM QoS class. The callback is -called when the aggregated value for the PM QoS class is changed. - -int pm_qos_remove_notifier(int param_class, notifier): -Removes the notification callback function for the PM QoS class. - - -From user mode: -Only processes can register a pm_qos request. To provide for automatic -cleanup of a process, the interface requires the process to register its -parameter requests in the following way: - -To register the default pm_qos target for the specific parameter, the process -must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] - -As long as the device node is held open that process has a registered -request on the parameter. - -To change the requested target value the process needs to write an s32 value to -the open device node. Alternatively the user mode program could write a hex -string for the value using 10 char long format e.g. "0x12345678". This -translates to a pm_qos_update_request call. - -To remove the user mode request for a target value simply close the device -node. - - -2. PM QoS per-device latency and flags framework - -For each device, there are three lists of PM QoS requests. Two of them are -maintained along with the aggregated targets of resume latency and active -state latency tolerance (in microseconds) and the third one is for PM QoS flags. -Values are updated in response to changes of the request list. - -The target values of resume latency and active state latency tolerance are -simply the minimum of the request values held in the parameter list elements. -The PM QoS flags aggregate value is a gather (bitwise OR) of all list elements' -values. One device PM QoS flag is defined currently: PM_QOS_FLAG_NO_POWER_OFF. - -Note: The aggregated target values are implemented in such a way that reading -the aggregated value does not require any locking mechanism. - - -From kernel mode the use of this interface is the following: - -int dev_pm_qos_add_request(device, handle, type, value): -Will insert an element into the list for that identified device with the -target value. Upon change to this list the new target is recomputed and any -registered notifiers are called only if the target value is now different. -Clients of dev_pm_qos need to save the handle for future use in other -dev_pm_qos API functions. - -int dev_pm_qos_update_request(handle, new_value): -Will update the list element pointed to by the handle with the new target value -and recompute the new aggregated target, calling the notification trees if the -target is changed. - -int dev_pm_qos_remove_request(handle): -Will remove the element. After removal it will update the aggregate target and -call the notification trees if the target was changed as a result of removing -the request. - -s32 dev_pm_qos_read_value(device): -Returns the aggregated value for a given device's constraints list. - -enum pm_qos_flags_status dev_pm_qos_flags(device, mask) -Check PM QoS flags of the given device against the given mask of flags. -The meaning of the return values is as follows: - PM_QOS_FLAGS_ALL: All flags from the mask are set - PM_QOS_FLAGS_SOME: Some flags from the mask are set - PM_QOS_FLAGS_NONE: No flags from the mask are set - PM_QOS_FLAGS_UNDEFINED: The device's PM QoS structure has not been - initialized or the list of requests is empty. - -int dev_pm_qos_add_ancestor_request(dev, handle, type, value) -Add a PM QoS request for the first direct ancestor of the given device whose -power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests) -or whose power.set_latency_tolerance callback pointer is not NULL (for -DEV_PM_QOS_LATENCY_TOLERANCE requests). - -int dev_pm_qos_expose_latency_limit(device, value) -Add a request to the device's PM QoS list of resume latency constraints and -create a sysfs attribute pm_qos_resume_latency_us under the device's power -directory allowing user space to manipulate that request. - -void dev_pm_qos_hide_latency_limit(device) -Drop the request added by dev_pm_qos_expose_latency_limit() from the device's -PM QoS list of resume latency constraints and remove sysfs attribute -pm_qos_resume_latency_us from the device's power directory. - -int dev_pm_qos_expose_flags(device, value) -Add a request to the device's PM QoS list of flags and create sysfs attribute -pm_qos_no_power_off under the device's power directory allowing user space to -change the value of the PM_QOS_FLAG_NO_POWER_OFF flag. - -void dev_pm_qos_hide_flags(device) -Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list -of flags and remove sysfs attribute pm_qos_no_power_off from the device's power -directory. - -Notification mechanisms: -The per-device PM QoS framework has a per-device notification tree. - -int dev_pm_qos_add_notifier(device, notifier): -Adds a notification callback function for the device. -The callback is called when the aggregated value of the device constraints list -is changed (for resume latency device PM QoS only). - -int dev_pm_qos_remove_notifier(device, notifier): -Removes the notification callback function for the device. - - -Active state latency tolerance - -This device PM QoS type is used to support systems in which hardware may switch -to energy-saving operation modes on the fly. In those systems, if the operation -mode chosen by the hardware attempts to save energy in an overly aggressive way, -it may cause excess latencies to be visible to software, causing it to miss -certain protocol requirements or target frame or sample rates etc. - -If there is a latency tolerance control mechanism for a given device available -to software, the .set_latency_tolerance callback in that device's dev_pm_info -structure should be populated. The routine pointed to by it is should implement -whatever is necessary to transfer the effective requirement value to the -hardware. - -Whenever the effective latency tolerance changes for the device, its -.set_latency_tolerance() callback will be executed and the effective value will -be passed to it. If that value is negative, which means that the list of -latency tolerance requirements for the device is empty, the callback is expected -to switch the underlying hardware latency tolerance control mechanism to an -autonomous mode if available. If that value is PM_QOS_LATENCY_ANY, in turn, and -the hardware supports a special "no requirement" setting, the callback is -expected to use it. That allows software to prevent the hardware from -automatically updating the device's latency tolerance in response to its power -state changes (e.g. during transitions from D3cold to D0), which generally may -be done in the autonomous latency tolerance control mode. - -If .set_latency_tolerance() is present for the device, sysfs attribute -pm_qos_latency_tolerance_us will be present in the devivce's power directory. -Then, user space can use that attribute to specify its latency tolerance -requirement for the device, if any. Writing "any" to it means "no requirement, -but do not let the hardware control latency tolerance" and writing "auto" to it -allows the hardware to be switched to the autonomous mode if there are no other -requirements from the kernel side in the device's list. - -Kernel code can use the functions described above along with the -DEV_PM_QOS_LATENCY_TOLERANCE device PM QoS type to add, remove and update -latency tolerance requirements for devices. diff --git a/Documentation/power/power_supply_class.rst b/Documentation/power/power_supply_class.rst new file mode 100644 index 000000000000..3f2c3fe38a61 --- /dev/null +++ b/Documentation/power/power_supply_class.rst @@ -0,0 +1,282 @@ +======================== +Linux power supply class +======================== + +Synopsis +~~~~~~~~ +Power supply class used to represent battery, UPS, AC or DC power supply +properties to user-space. + +It defines core set of attributes, which should be applicable to (almost) +every power supply out there. Attributes are available via sysfs and uevent +interfaces. + +Each attribute has well defined meaning, up to unit of measure used. While +the attributes provided are believed to be universally applicable to any +power supply, specific monitoring hardware may not be able to provide them +all, so any of them may be skipped. + +Power supply class is extensible, and allows to define drivers own attributes. +The core attribute set is subject to the standard Linux evolution (i.e. +if it will be found that some attribute is applicable to many power supply +types or their drivers, it can be added to the core set). + +It also integrates with LED framework, for the purpose of providing +typically expected feedback of battery charging/fully charged status and +AC/USB power supply online status. (Note that specific details of the +indication (including whether to use it at all) are fully controllable by +user and/or specific machine defaults, per design principles of LED +framework). + + +Attributes/properties +~~~~~~~~~~~~~~~~~~~~~ +Power supply class has predefined set of attributes, this eliminates code +duplication across drivers. Power supply class insist on reusing its +predefined attributes *and* their units. + +So, userspace gets predictable set of attributes and their units for any +kind of power supply, and can process/present them to a user in consistent +manner. Results for different power supplies and machines are also directly +comparable. + +See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c +for the example how to declare and handle attributes. + + +Units +~~~~~ +Quoting include/linux/power_supply.h: + + All voltages, currents, charges, energies, time and temperatures in µV, + µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise + stated. It's driver's job to convert its raw values to units in which + this class operates. + + +Attributes/properties detailed +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ++--------------------------------------------------------------------------+ +| **Charge/Energy/Capacity - how to not confuse** | ++--------------------------------------------------------------------------+ +| **Because both "charge" (µAh) and "energy" (µWh) represents "capacity" | +| of battery, this class distinguish these terms. Don't mix them!** | +| | +| - `CHARGE_*` | +| attributes represents capacity in µAh only. | +| - `ENERGY_*` | +| attributes represents capacity in µWh only. | +| - `CAPACITY` | +| attribute represents capacity in *percents*, from 0 to 100. | ++--------------------------------------------------------------------------+ + +Postfixes: + +_AVG + *hardware* averaged value, use it if your hardware is really able to + report averaged values. +_NOW + momentary/instantaneous values. + +STATUS + this attribute represents operating status (charging, full, + discharging (i.e. powering a load), etc.). This corresponds to + `BATTERY_STATUS_*` values, as defined in battery.h. + +CHARGE_TYPE + batteries can typically charge at different rates. + This defines trickle and fast charges. For batteries that + are already charged or discharging, 'n/a' can be displayed (or + 'unknown', if the status is not known). + +AUTHENTIC + indicates the power supply (battery or charger) connected + to the platform is authentic(1) or non authentic(0). + +HEALTH + represents health of the battery, values corresponds to + POWER_SUPPLY_HEALTH_*, defined in battery.h. + +VOLTAGE_OCV + open circuit voltage of the battery. + +VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN + design values for maximal and minimal power supply voltages. + Maximal/minimal means values of voltages when battery considered + "full"/"empty" at normal conditions. Yes, there is no direct relation + between voltage and battery capacity, but some dumb + batteries use voltage for very approximated calculation of capacity. + Battery driver also can use this attribute just to inform userspace + about maximal and minimal voltage thresholds of a given battery. + +VOLTAGE_MAX, VOLTAGE_MIN + same as _DESIGN voltage values except that these ones should be used + if hardware could only guess (measure and retain) the thresholds of a + given power supply. + +VOLTAGE_BOOT + Reports the voltage measured during boot + +CURRENT_BOOT + Reports the current measured during boot + +CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN + design charge values, when battery considered full/empty. + +ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN + same as above but for energy. + +CHARGE_FULL, CHARGE_EMPTY + These attributes means "last remembered value of charge when battery + became full/empty". It also could mean "value of charge when battery + considered full/empty at given conditions (temperature, age)". + I.e. these attributes represents real thresholds, not design values. + +ENERGY_FULL, ENERGY_EMPTY + same as above but for energy. + +CHARGE_COUNTER + the current charge counter (in µAh). This could easily + be negative; there is no empty or full value. It is only useful for + relative, time-based measurements. + +PRECHARGE_CURRENT + the maximum charge current during precharge phase of charge cycle + (typically 20% of battery capacity). + +CHARGE_TERM_CURRENT + Charge termination current. The charge cycle terminates when battery + voltage is above recharge threshold, and charge current is below + this setting (typically 10% of battery capacity). + +CONSTANT_CHARGE_CURRENT + constant charge current programmed by charger. + + +CONSTANT_CHARGE_CURRENT_MAX + maximum charge current supported by the power supply object. + +CONSTANT_CHARGE_VOLTAGE + constant charge voltage programmed by charger. +CONSTANT_CHARGE_VOLTAGE_MAX + maximum charge voltage supported by the power supply object. + +INPUT_CURRENT_LIMIT + input current limit programmed by charger. Indicates + the current drawn from a charging source. + +CHARGE_CONTROL_LIMIT + current charge control limit setting +CHARGE_CONTROL_LIMIT_MAX + maximum charge control limit setting + +CALIBRATE + battery or coulomb counter calibration status + +CAPACITY + capacity in percents. +CAPACITY_ALERT_MIN + minimum capacity alert value in percents. +CAPACITY_ALERT_MAX + maximum capacity alert value in percents. +CAPACITY_LEVEL + capacity level. This corresponds to POWER_SUPPLY_CAPACITY_LEVEL_*. + +TEMP + temperature of the power supply. +TEMP_ALERT_MIN + minimum battery temperature alert. +TEMP_ALERT_MAX + maximum battery temperature alert. +TEMP_AMBIENT + ambient temperature. +TEMP_AMBIENT_ALERT_MIN + minimum ambient temperature alert. +TEMP_AMBIENT_ALERT_MAX + maximum ambient temperature alert. +TEMP_MIN + minimum operatable temperature +TEMP_MAX + maximum operatable temperature + +TIME_TO_EMPTY + seconds left for battery to be considered empty + (i.e. while battery powers a load) +TIME_TO_FULL + seconds left for battery to be considered full + (i.e. while battery is charging) + + +Battery <-> external power supply interaction +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Often power supplies are acting as supplies and supplicants at the same +time. Batteries are good example. So, batteries usually care if they're +externally powered or not. + +For that case, power supply class implements notification mechanism for +batteries. + +External power supply (AC) lists supplicants (batteries) names in +"supplied_to" struct member, and each power_supply_changed() call +issued by external power supply will notify supplicants via +external_power_changed callback. + + +Devicetree battery characteristics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Drivers should call power_supply_get_battery_info() to obtain battery +characteristics from a devicetree battery node, defined in +Documentation/devicetree/bindings/power/supply/battery.txt. This is +implemented in drivers/power/supply/bq27xxx_battery.c. + +Properties in struct power_supply_battery_info and their counterparts in the +battery node have names corresponding to elements in enum power_supply_property, +for naming consistency between sysfs attributes and battery node properties. + + +QA +~~ + +Q: + Where is POWER_SUPPLY_PROP_XYZ attribute? +A: + If you cannot find attribute suitable for your driver needs, feel free + to add it and send patch along with your driver. + + The attributes available currently are the ones currently provided by the + drivers written. + + Good candidates to add in future: model/part#, cycle_time, manufacturer, + etc. + + +Q: + I have some very specific attribute (e.g. battery color), should I add + this attribute to standard ones? +A: + Most likely, no. Such attribute can be placed in the driver itself, if + it is useful. Of course, if the attribute in question applicable to + large set of batteries, provided by many drivers, and/or comes from + some general battery specification/standard, it may be a candidate to + be added to the core attribute set. + + +Q: + Suppose, my battery monitoring chip/firmware does not provides capacity + in percents, but provides charge_{now,full,empty}. Should I calculate + percentage capacity manually, inside the driver, and register CAPACITY + attribute? The same question about time_to_empty/time_to_full. +A: + Most likely, no. This class is designed to export properties which are + directly measurable by the specific hardware available. + + Inferring not available properties using some heuristics or mathematical + model is not subject of work for a battery driver. Such functionality + should be factored out, and in fact, apm_power, the driver to serve + legacy APM API on top of power supply class, uses a simple heuristic of + approximating remaining battery capacity based on its charge, current, + voltage and so on. But full-fledged battery model is likely not subject + for kernel at all, as it would require floating point calculation to deal + with things like differential equations and Kalman filters. This is + better be handled by batteryd/libbattery, yet to be written. diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt deleted file mode 100644 index 300d37896e51..000000000000 --- a/Documentation/power/power_supply_class.txt +++ /dev/null @@ -1,231 +0,0 @@ -Linux power supply class -======================== - -Synopsis -~~~~~~~~ -Power supply class used to represent battery, UPS, AC or DC power supply -properties to user-space. - -It defines core set of attributes, which should be applicable to (almost) -every power supply out there. Attributes are available via sysfs and uevent -interfaces. - -Each attribute has well defined meaning, up to unit of measure used. While -the attributes provided are believed to be universally applicable to any -power supply, specific monitoring hardware may not be able to provide them -all, so any of them may be skipped. - -Power supply class is extensible, and allows to define drivers own attributes. -The core attribute set is subject to the standard Linux evolution (i.e. -if it will be found that some attribute is applicable to many power supply -types or their drivers, it can be added to the core set). - -It also integrates with LED framework, for the purpose of providing -typically expected feedback of battery charging/fully charged status and -AC/USB power supply online status. (Note that specific details of the -indication (including whether to use it at all) are fully controllable by -user and/or specific machine defaults, per design principles of LED -framework). - - -Attributes/properties -~~~~~~~~~~~~~~~~~~~~~ -Power supply class has predefined set of attributes, this eliminates code -duplication across drivers. Power supply class insist on reusing its -predefined attributes *and* their units. - -So, userspace gets predictable set of attributes and their units for any -kind of power supply, and can process/present them to a user in consistent -manner. Results for different power supplies and machines are also directly -comparable. - -See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c -for the example how to declare and handle attributes. - - -Units -~~~~~ -Quoting include/linux/power_supply.h: - - All voltages, currents, charges, energies, time and temperatures in µV, - µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise - stated. It's driver's job to convert its raw values to units in which - this class operates. - - -Attributes/properties detailed -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -~ ~ ~ ~ ~ ~ ~ Charge/Energy/Capacity - how to not confuse ~ ~ ~ ~ ~ ~ ~ -~ ~ -~ Because both "charge" (µAh) and "energy" (µWh) represents "capacity" ~ -~ of battery, this class distinguish these terms. Don't mix them! ~ -~ ~ -~ CHARGE_* attributes represents capacity in µAh only. ~ -~ ENERGY_* attributes represents capacity in µWh only. ~ -~ CAPACITY attribute represents capacity in *percents*, from 0 to 100. ~ -~ ~ -~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - -Postfixes: -_AVG - *hardware* averaged value, use it if your hardware is really able to -report averaged values. -_NOW - momentary/instantaneous values. - -STATUS - this attribute represents operating status (charging, full, -discharging (i.e. powering a load), etc.). This corresponds to -BATTERY_STATUS_* values, as defined in battery.h. - -CHARGE_TYPE - batteries can typically charge at different rates. -This defines trickle and fast charges. For batteries that -are already charged or discharging, 'n/a' can be displayed (or -'unknown', if the status is not known). - -AUTHENTIC - indicates the power supply (battery or charger) connected -to the platform is authentic(1) or non authentic(0). - -HEALTH - represents health of the battery, values corresponds to -POWER_SUPPLY_HEALTH_*, defined in battery.h. - -VOLTAGE_OCV - open circuit voltage of the battery. - -VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and -minimal power supply voltages. Maximal/minimal means values of voltages -when battery considered "full"/"empty" at normal conditions. Yes, there is -no direct relation between voltage and battery capacity, but some dumb -batteries use voltage for very approximated calculation of capacity. -Battery driver also can use this attribute just to inform userspace -about maximal and minimal voltage thresholds of a given battery. - -VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that -these ones should be used if hardware could only guess (measure and -retain) the thresholds of a given power supply. - -VOLTAGE_BOOT - Reports the voltage measured during boot - -CURRENT_BOOT - Reports the current measured during boot - -CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when -battery considered full/empty. - -ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN - same as above but for energy. - -CHARGE_FULL, CHARGE_EMPTY - These attributes means "last remembered value -of charge when battery became full/empty". It also could mean "value of -charge when battery considered full/empty at given conditions (temperature, -age)". I.e. these attributes represents real thresholds, not design values. - -ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. - -CHARGE_COUNTER - the current charge counter (in µAh). This could easily -be negative; there is no empty or full value. It is only useful for -relative, time-based measurements. - -PRECHARGE_CURRENT - the maximum charge current during precharge phase -of charge cycle (typically 20% of battery capacity). -CHARGE_TERM_CURRENT - Charge termination current. The charge cycle -terminates when battery voltage is above recharge threshold, and charge -current is below this setting (typically 10% of battery capacity). - -CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger. -CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the -power supply object. - -CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger. -CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the -power supply object. - -INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates -the current drawn from a charging source. - -CHARGE_CONTROL_LIMIT - current charge control limit setting -CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting - -CALIBRATE - battery or coulomb counter calibration status - -CAPACITY - capacity in percents. -CAPACITY_ALERT_MIN - minimum capacity alert value in percents. -CAPACITY_ALERT_MAX - maximum capacity alert value in percents. -CAPACITY_LEVEL - capacity level. This corresponds to -POWER_SUPPLY_CAPACITY_LEVEL_*. - -TEMP - temperature of the power supply. -TEMP_ALERT_MIN - minimum battery temperature alert. -TEMP_ALERT_MAX - maximum battery temperature alert. -TEMP_AMBIENT - ambient temperature. -TEMP_AMBIENT_ALERT_MIN - minimum ambient temperature alert. -TEMP_AMBIENT_ALERT_MAX - maximum ambient temperature alert. -TEMP_MIN - minimum operatable temperature -TEMP_MAX - maximum operatable temperature - -TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e. -while battery powers a load) -TIME_TO_FULL - seconds left for battery to be considered full (i.e. -while battery is charging) - - -Battery <-> external power supply interaction -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Often power supplies are acting as supplies and supplicants at the same -time. Batteries are good example. So, batteries usually care if they're -externally powered or not. - -For that case, power supply class implements notification mechanism for -batteries. - -External power supply (AC) lists supplicants (batteries) names in -"supplied_to" struct member, and each power_supply_changed() call -issued by external power supply will notify supplicants via -external_power_changed callback. - - -Devicetree battery characteristics -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Drivers should call power_supply_get_battery_info() to obtain battery -characteristics from a devicetree battery node, defined in -Documentation/devicetree/bindings/power/supply/battery.txt. This is -implemented in drivers/power/supply/bq27xxx_battery.c. - -Properties in struct power_supply_battery_info and their counterparts in the -battery node have names corresponding to elements in enum power_supply_property, -for naming consistency between sysfs attributes and battery node properties. - - -QA -~~ -Q: Where is POWER_SUPPLY_PROP_XYZ attribute? -A: If you cannot find attribute suitable for your driver needs, feel free - to add it and send patch along with your driver. - - The attributes available currently are the ones currently provided by the - drivers written. - - Good candidates to add in future: model/part#, cycle_time, manufacturer, - etc. - - -Q: I have some very specific attribute (e.g. battery color), should I add - this attribute to standard ones? -A: Most likely, no. Such attribute can be placed in the driver itself, if - it is useful. Of course, if the attribute in question applicable to - large set of batteries, provided by many drivers, and/or comes from - some general battery specification/standard, it may be a candidate to - be added to the core attribute set. - - -Q: Suppose, my battery monitoring chip/firmware does not provides capacity - in percents, but provides charge_{now,full,empty}. Should I calculate - percentage capacity manually, inside the driver, and register CAPACITY - attribute? The same question about time_to_empty/time_to_full. -A: Most likely, no. This class is designed to export properties which are - directly measurable by the specific hardware available. - - Inferring not available properties using some heuristics or mathematical - model is not subject of work for a battery driver. Such functionality - should be factored out, and in fact, apm_power, the driver to serve - legacy APM API on top of power supply class, uses a simple heuristic of - approximating remaining battery capacity based on its charge, current, - voltage and so on. But full-fledged battery model is likely not subject - for kernel at all, as it would require floating point calculation to deal - with things like differential equations and Kalman filters. This is - better be handled by batteryd/libbattery, yet to be written. diff --git a/Documentation/power/powercap/powercap.rst b/Documentation/power/powercap/powercap.rst new file mode 100644 index 000000000000..7ae3b44c7624 --- /dev/null +++ b/Documentation/power/powercap/powercap.rst @@ -0,0 +1,257 @@ +======================= +Power Capping Framework +======================= + +The power capping framework provides a consistent interface between the kernel +and the user space that allows power capping drivers to expose the settings to +user space in a uniform way. + +Terminology +=========== + +The framework exposes power capping devices to user space via sysfs in the +form of a tree of objects. The objects at the root level of the tree represent +'control types', which correspond to different methods of power capping. For +example, the intel-rapl control type represents the Intel "Running Average +Power Limit" (RAPL) technology, whereas the 'idle-injection' control type +corresponds to the use of idle injection for controlling power. + +Power zones represent different parts of the system, which can be controlled and +monitored using the power capping method determined by the control type the +given zone belongs to. They each contain attributes for monitoring power, as +well as controls represented in the form of power constraints. If the parts of +the system represented by different power zones are hierarchical (that is, one +bigger part consists of multiple smaller parts that each have their own power +controls), those power zones may also be organized in a hierarchy with one +parent power zone containing multiple subzones and so on to reflect the power +control topology of the system. In that case, it is possible to apply power +capping to a set of devices together using the parent power zone and if more +fine grained control is required, it can be applied through the subzones. + + +Example sysfs interface tree:: + + /sys/devices/virtual/powercap + └──intel-rapl + ├──intel-rapl:0 + │   ├──constraint_0_name + │   ├──constraint_0_power_limit_uw + │   ├──constraint_0_time_window_us + │   ├──constraint_1_name + │   ├──constraint_1_power_limit_uw + │   ├──constraint_1_time_window_us + │   ├──device -> ../../intel-rapl + │   ├──energy_uj + │   ├──intel-rapl:0:0 + │   │   ├──constraint_0_name + │   │   ├──constraint_0_power_limit_uw + │   │   ├──constraint_0_time_window_us + │   │   ├──constraint_1_name + │   │   ├──constraint_1_power_limit_uw + │   │   ├──constraint_1_time_window_us + │   │   ├──device -> ../../intel-rapl:0 + │   │   ├──energy_uj + │   │   ├──max_energy_range_uj + │   │   ├──name + │   │   ├──enabled + │   │   ├──power + │   │   │   ├──async + │   │   │   [] + │   │   ├──subsystem -> ../../../../../../class/power_cap + │   │   └──uevent + │   ├──intel-rapl:0:1 + │   │   ├──constraint_0_name + │   │   ├──constraint_0_power_limit_uw + │   │   ├──constraint_0_time_window_us + │   │   ├──constraint_1_name + │   │   ├──constraint_1_power_limit_uw + │   │   ├──constraint_1_time_window_us + │   │   ├──device -> ../../intel-rapl:0 + │   │   ├──energy_uj + │   │   ├──max_energy_range_uj + │   │   ├──name + │   │   ├──enabled + │   │   ├──power + │   │   │   ├──async + │   │   │   [] + │   │   ├──subsystem -> ../../../../../../class/power_cap + │   │   └──uevent + │   ├──max_energy_range_uj + │   ├──max_power_range_uw + │   ├──name + │   ├──enabled + │   ├──power + │   │   ├──async + │   │   [] + │   ├──subsystem -> ../../../../../class/power_cap + │   ├──enabled + │   ├──uevent + ├──intel-rapl:1 + │   ├──constraint_0_name + │   ├──constraint_0_power_limit_uw + │   ├──constraint_0_time_window_us + │   ├──constraint_1_name + │   ├──constraint_1_power_limit_uw + │   ├──constraint_1_time_window_us + │   ├──device -> ../../intel-rapl + │   ├──energy_uj + │   ├──intel-rapl:1:0 + │   │   ├──constraint_0_name + │   │   ├──constraint_0_power_limit_uw + │   │   ├──constraint_0_time_window_us + │   │   ├──constraint_1_name + │   │   ├──constraint_1_power_limit_uw + │   │   ├──constraint_1_time_window_us + │   │   ├──device -> ../../intel-rapl:1 + │   │   ├──energy_uj + │   │   ├──max_energy_range_uj + │   │   ├──name + │   │   ├──enabled + │   │   ├──power + │   │   │   ├──async + │   │   │   [] + │   │   ├──subsystem -> ../../../../../../class/power_cap + │   │   └──uevent + │   ├──intel-rapl:1:1 + │   │   ├──constraint_0_name + │   │   ├──constraint_0_power_limit_uw + │   │   ├──constraint_0_time_window_us + │   │   ├──constraint_1_name + │   │   ├──constraint_1_power_limit_uw + │   │   ├──constraint_1_time_window_us + │   │   ├──device -> ../../intel-rapl:1 + │   │   ├──energy_uj + │   │   ├──max_energy_range_uj + │   │   ├──name + │   │   ├──enabled + │   │   ├──power + │   │   │   ├──async + │   │   │   [] + │   │   ├──subsystem -> ../../../../../../class/power_cap + │   │   └──uevent + │   ├──max_energy_range_uj + │   ├──max_power_range_uw + │   ├──name + │   ├──enabled + │   ├──power + │   │   ├──async + │   │   [] + │   ├──subsystem -> ../../../../../class/power_cap + │   ├──uevent + ├──power + │   ├──async + │   [] + ├──subsystem -> ../../../../class/power_cap + ├──enabled + └──uevent + +The above example illustrates a case in which the Intel RAPL technology, +available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one +control type called intel-rapl which contains two power zones, intel-rapl:0 and +intel-rapl:1, representing CPU packages. Each of these power zones contains +two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the +"core" and the "uncore" parts of the given CPU package, respectively. All of +the zones and subzones contain energy monitoring attributes (energy_uj, +max_energy_range_uj) and constraint attributes (constraint_*) allowing controls +to be applied (the constraints in the 'package' power zones apply to the whole +CPU packages and the subzone constraints only apply to the respective parts of +the given package individually). Since Intel RAPL doesn't provide instantaneous +power value, there is no power_uw attribute. + +In addition to that, each power zone contains a name attribute, allowing the +part of the system represented by that zone to be identified. +For example:: + + cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name + +package-0 +--------- + +The Intel RAPL technology allows two constraints, short term and long term, +with two different time windows to be applied to each power zone. Thus for +each zone there are 2 attributes representing the constraint names, 2 power +limits and 2 attributes representing the sizes of the time windows. Such that, +constraint_j_* attributes correspond to the jth constraint (j = 0,1). + +For example:: + + constraint_0_name + constraint_0_power_limit_uw + constraint_0_time_window_us + constraint_1_name + constraint_1_power_limit_uw + constraint_1_time_window_us + +Power Zone Attributes +===================== + +Monitoring attributes +--------------------- + +energy_uj (rw) + Current energy counter in micro joules. Write "0" to reset. + If the counter can not be reset, then this attribute is read only. + +max_energy_range_uj (ro) + Range of the above energy counter in micro-joules. + +power_uw (ro) + Current power in micro watts. + +max_power_range_uw (ro) + Range of the above power value in micro-watts. + +name (ro) + Name of this power zone. + +It is possible that some domains have both power ranges and energy counter ranges; +however, only one is mandatory. + +Constraints +----------- + +constraint_X_power_limit_uw (rw) + Power limit in micro watts, which should be applicable for the + time window specified by "constraint_X_time_window_us". + +constraint_X_time_window_us (rw) + Time window in micro seconds. + +constraint_X_name (ro) + An optional name of the constraint + +constraint_X_max_power_uw(ro) + Maximum allowed power in micro watts. + +constraint_X_min_power_uw(ro) + Minimum allowed power in micro watts. + +constraint_X_max_time_window_us(ro) + Maximum allowed time window in micro seconds. + +constraint_X_min_time_window_us(ro) + Minimum allowed time window in micro seconds. + +Except power_limit_uw and time_window_us other fields are optional. + +Common zone and control type attributes +--------------------------------------- + +enabled (rw): Enable/Disable controls at zone level or for all zones using +a control type. + +Power Cap Client Driver Interface +================================= + +The API summary: + +Call powercap_register_control_type() to register control type object. +Call powercap_register_zone() to register a power zone (under a given +control type), either as a top-level power zone or as a subzone of another +power zone registered earlier. +The number of constraints in a power zone and the corresponding callbacks have +to be defined prior to calling powercap_register_zone() to register that zone. + +To Free a power zone call powercap_unregister_zone(). +To free a control type object call powercap_unregister_control_type(). +Detailed API can be generated using kernel-doc on include/linux/powercap.h. diff --git a/Documentation/power/powercap/powercap.txt b/Documentation/power/powercap/powercap.txt deleted file mode 100644 index 1e6ef164e07a..000000000000 --- a/Documentation/power/powercap/powercap.txt +++ /dev/null @@ -1,236 +0,0 @@ -Power Capping Framework -================================== - -The power capping framework provides a consistent interface between the kernel -and the user space that allows power capping drivers to expose the settings to -user space in a uniform way. - -Terminology -========================= -The framework exposes power capping devices to user space via sysfs in the -form of a tree of objects. The objects at the root level of the tree represent -'control types', which correspond to different methods of power capping. For -example, the intel-rapl control type represents the Intel "Running Average -Power Limit" (RAPL) technology, whereas the 'idle-injection' control type -corresponds to the use of idle injection for controlling power. - -Power zones represent different parts of the system, which can be controlled and -monitored using the power capping method determined by the control type the -given zone belongs to. They each contain attributes for monitoring power, as -well as controls represented in the form of power constraints. If the parts of -the system represented by different power zones are hierarchical (that is, one -bigger part consists of multiple smaller parts that each have their own power -controls), those power zones may also be organized in a hierarchy with one -parent power zone containing multiple subzones and so on to reflect the power -control topology of the system. In that case, it is possible to apply power -capping to a set of devices together using the parent power zone and if more -fine grained control is required, it can be applied through the subzones. - - -Example sysfs interface tree: - -/sys/devices/virtual/powercap -??? intel-rapl - ??? intel-rapl:0 - ?   ??? constraint_0_name - ?   ??? constraint_0_power_limit_uw - ?   ??? constraint_0_time_window_us - ?   ??? constraint_1_name - ?   ??? constraint_1_power_limit_uw - ?   ??? constraint_1_time_window_us - ?   ??? device -> ../../intel-rapl - ?   ??? energy_uj - ?   ??? intel-rapl:0:0 - ?   ?   ??? constraint_0_name - ?   ?   ??? constraint_0_power_limit_uw - ?   ?   ??? constraint_0_time_window_us - ?   ?   ??? constraint_1_name - ?   ?   ??? constraint_1_power_limit_uw - ?   ?   ??? constraint_1_time_window_us - ?   ?   ??? device -> ../../intel-rapl:0 - ?   ?   ??? energy_uj - ?   ?   ??? max_energy_range_uj - ?   ?   ??? name - ?   ?   ??? enabled - ?   ?   ??? power - ?   ?   ?   ??? async - ?   ?   ?   [] - ?   ?   ??? subsystem -> ../../../../../../class/power_cap - ?   ?   ??? uevent - ?   ??? intel-rapl:0:1 - ?   ?   ??? constraint_0_name - ?   ?   ??? constraint_0_power_limit_uw - ?   ?   ??? constraint_0_time_window_us - ?   ?   ??? constraint_1_name - ?   ?   ??? constraint_1_power_limit_uw - ?   ?   ??? constraint_1_time_window_us - ?   ?   ??? device -> ../../intel-rapl:0 - ?   ?   ??? energy_uj - ?   ?   ??? max_energy_range_uj - ?   ?   ??? name - ?   ?   ??? enabled - ?   ?   ??? power - ?   ?   ?   ??? async - ?   ?   ?   [] - ?   ?   ??? subsystem -> ../../../../../../class/power_cap - ?   ?   ??? uevent - ?   ??? max_energy_range_uj - ?   ??? max_power_range_uw - ?   ??? name - ?   ??? enabled - ?   ??? power - ?   ?   ??? async - ?   ?   [] - ?   ??? subsystem -> ../../../../../class/power_cap - ?   ??? enabled - ?   ??? uevent - ??? intel-rapl:1 - ?   ??? constraint_0_name - ?   ??? constraint_0_power_limit_uw - ?   ??? constraint_0_time_window_us - ?   ??? constraint_1_name - ?   ??? constraint_1_power_limit_uw - ?   ??? constraint_1_time_window_us - ?   ??? device -> ../../intel-rapl - ?   ??? energy_uj - ?   ??? intel-rapl:1:0 - ?   ?   ??? constraint_0_name - ?   ?   ??? constraint_0_power_limit_uw - ?   ?   ??? constraint_0_time_window_us - ?   ?   ??? constraint_1_name - ?   ?   ??? constraint_1_power_limit_uw - ?   ?   ??? constraint_1_time_window_us - ?   ?   ??? device -> ../../intel-rapl:1 - ?   ?   ??? energy_uj - ?   ?   ??? max_energy_range_uj - ?   ?   ??? name - ?   ?   ??? enabled - ?   ?   ??? power - ?   ?   ?   ??? async - ?   ?   ?   [] - ?   ?   ??? subsystem -> ../../../../../../class/power_cap - ?   ?   ??? uevent - ?   ??? intel-rapl:1:1 - ?   ?   ??? constraint_0_name - ?   ?   ??? constraint_0_power_limit_uw - ?   ?   ??? constraint_0_time_window_us - ?   ?   ??? constraint_1_name - ?   ?   ??? constraint_1_power_limit_uw - ?   ?   ??? constraint_1_time_window_us - ?   ?   ??? device -> ../../intel-rapl:1 - ?   ?   ??? energy_uj - ?   ?   ??? max_energy_range_uj - ?   ?   ??? name - ?   ?   ??? enabled - ?   ?   ??? power - ?   ?   ?   ??? async - ?   ?   ?   [] - ?   ?   ??? subsystem -> ../../../../../../class/power_cap - ?   ?   ??? uevent - ?   ??? max_energy_range_uj - ?   ??? max_power_range_uw - ?   ??? name - ?   ??? enabled - ?   ??? power - ?   ?   ??? async - ?   ?   [] - ?   ??? subsystem -> ../../../../../class/power_cap - ?   ??? uevent - ??? power - ?   ??? async - ?   [] - ??? subsystem -> ../../../../class/power_cap - ??? enabled - ??? uevent - -The above example illustrates a case in which the Intel RAPL technology, -available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one -control type called intel-rapl which contains two power zones, intel-rapl:0 and -intel-rapl:1, representing CPU packages. Each of these power zones contains -two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the -"core" and the "uncore" parts of the given CPU package, respectively. All of -the zones and subzones contain energy monitoring attributes (energy_uj, -max_energy_range_uj) and constraint attributes (constraint_*) allowing controls -to be applied (the constraints in the 'package' power zones apply to the whole -CPU packages and the subzone constraints only apply to the respective parts of -the given package individually). Since Intel RAPL doesn't provide instantaneous -power value, there is no power_uw attribute. - -In addition to that, each power zone contains a name attribute, allowing the -part of the system represented by that zone to be identified. -For example: - -cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name -package-0 - -The Intel RAPL technology allows two constraints, short term and long term, -with two different time windows to be applied to each power zone. Thus for -each zone there are 2 attributes representing the constraint names, 2 power -limits and 2 attributes representing the sizes of the time windows. Such that, -constraint_j_* attributes correspond to the jth constraint (j = 0,1). - -For example: - constraint_0_name - constraint_0_power_limit_uw - constraint_0_time_window_us - constraint_1_name - constraint_1_power_limit_uw - constraint_1_time_window_us - -Power Zone Attributes -================================= -Monitoring attributes ----------------------- - -energy_uj (rw): Current energy counter in micro joules. Write "0" to reset. -If the counter can not be reset, then this attribute is read only. - -max_energy_range_uj (ro): Range of the above energy counter in micro-joules. - -power_uw (ro): Current power in micro watts. - -max_power_range_uw (ro): Range of the above power value in micro-watts. - -name (ro): Name of this power zone. - -It is possible that some domains have both power ranges and energy counter ranges; -however, only one is mandatory. - -Constraints ----------------- -constraint_X_power_limit_uw (rw): Power limit in micro watts, which should be -applicable for the time window specified by "constraint_X_time_window_us". - -constraint_X_time_window_us (rw): Time window in micro seconds. - -constraint_X_name (ro): An optional name of the constraint - -constraint_X_max_power_uw(ro): Maximum allowed power in micro watts. - -constraint_X_min_power_uw(ro): Minimum allowed power in micro watts. - -constraint_X_max_time_window_us(ro): Maximum allowed time window in micro seconds. - -constraint_X_min_time_window_us(ro): Minimum allowed time window in micro seconds. - -Except power_limit_uw and time_window_us other fields are optional. - -Common zone and control type attributes ----------------------------------------- -enabled (rw): Enable/Disable controls at zone level or for all zones using -a control type. - -Power Cap Client Driver Interface -================================== -The API summary: - -Call powercap_register_control_type() to register control type object. -Call powercap_register_zone() to register a power zone (under a given -control type), either as a top-level power zone or as a subzone of another -power zone registered earlier. -The number of constraints in a power zone and the corresponding callbacks have -to be defined prior to calling powercap_register_zone() to register that zone. - -To Free a power zone call powercap_unregister_zone(). -To free a control type object call powercap_unregister_control_type(). -Detailed API can be generated using kernel-doc on include/linux/powercap.h. diff --git a/Documentation/power/regulator/consumer.rst b/Documentation/power/regulator/consumer.rst new file mode 100644 index 000000000000..0cd8cc1275a7 --- /dev/null +++ b/Documentation/power/regulator/consumer.rst @@ -0,0 +1,229 @@ +=================================== +Regulator Consumer Driver Interface +=================================== + +This text describes the regulator interface for consumer device drivers. +Please see overview.txt for a description of the terms used in this text. + + +1. Consumer Regulator Access (static & dynamic drivers) +======================================================= + +A consumer driver can get access to its supply regulator by calling :: + + regulator = regulator_get(dev, "Vcc"); + +The consumer passes in its struct device pointer and power supply ID. The core +then finds the correct regulator by consulting a machine specific lookup table. +If the lookup is successful then this call will return a pointer to the struct +regulator that supplies this consumer. + +To release the regulator the consumer driver should call :: + + regulator_put(regulator); + +Consumers can be supplied by more than one regulator e.g. codec consumer with +analog and digital supplies :: + + digital = regulator_get(dev, "Vcc"); /* digital core */ + analog = regulator_get(dev, "Avdd"); /* analog */ + +The regulator access functions regulator_get() and regulator_put() will +usually be called in your device drivers probe() and remove() respectively. + + +2. Regulator Output Enable & Disable (static & dynamic drivers) +=============================================================== + + +A consumer can enable its power supply by calling:: + + int regulator_enable(regulator); + +NOTE: + The supply may already be enabled before regulator_enabled() is called. + This may happen if the consumer shares the regulator or the regulator has been + previously enabled by bootloader or kernel board initialization code. + +A consumer can determine if a regulator is enabled by calling:: + + int regulator_is_enabled(regulator); + +This will return > zero when the regulator is enabled. + + +A consumer can disable its supply when no longer needed by calling:: + + int regulator_disable(regulator); + +NOTE: + This may not disable the supply if it's shared with other consumers. The + regulator will only be disabled when the enabled reference count is zero. + +Finally, a regulator can be forcefully disabled in the case of an emergency:: + + int regulator_force_disable(regulator); + +NOTE: + this will immediately and forcefully shutdown the regulator output. All + consumers will be powered off. + + +3. Regulator Voltage Control & Status (dynamic drivers) +======================================================= + +Some consumer drivers need to be able to dynamically change their supply +voltage to match system operating points. e.g. CPUfreq drivers can scale +voltage along with frequency to save power, SD drivers may need to select the +correct card voltage, etc. + +Consumers can control their supply voltage by calling:: + + int regulator_set_voltage(regulator, min_uV, max_uV); + +Where min_uV and max_uV are the minimum and maximum acceptable voltages in +microvolts. + +NOTE: this can be called when the regulator is enabled or disabled. If called +when enabled, then the voltage changes instantly, otherwise the voltage +configuration changes and the voltage is physically set when the regulator is +next enabled. + +The regulators configured voltage output can be found by calling:: + + int regulator_get_voltage(regulator); + +NOTE: + get_voltage() will return the configured output voltage whether the + regulator is enabled or disabled and should NOT be used to determine regulator + output state. However this can be used in conjunction with is_enabled() to + determine the regulator physical output voltage. + + +4. Regulator Current Limit Control & Status (dynamic drivers) +============================================================= + +Some consumer drivers need to be able to dynamically change their supply +current limit to match system operating points. e.g. LCD backlight driver can +change the current limit to vary the backlight brightness, USB drivers may want +to set the limit to 500mA when supplying power. + +Consumers can control their supply current limit by calling:: + + int regulator_set_current_limit(regulator, min_uA, max_uA); + +Where min_uA and max_uA are the minimum and maximum acceptable current limit in +microamps. + +NOTE: + this can be called when the regulator is enabled or disabled. If called + when enabled, then the current limit changes instantly, otherwise the current + limit configuration changes and the current limit is physically set when the + regulator is next enabled. + +A regulators current limit can be found by calling:: + + int regulator_get_current_limit(regulator); + +NOTE: + get_current_limit() will return the current limit whether the regulator + is enabled or disabled and should not be used to determine regulator current + load. + + +5. Regulator Operating Mode Control & Status (dynamic drivers) +============================================================== + +Some consumers can further save system power by changing the operating mode of +their supply regulator to be more efficient when the consumers operating state +changes. e.g. consumer driver is idle and subsequently draws less current + +Regulator operating mode can be changed indirectly or directly. + +Indirect operating mode control. +-------------------------------- +Consumer drivers can request a change in their supply regulator operating mode +by calling:: + + int regulator_set_load(struct regulator *regulator, int load_uA); + +This will cause the core to recalculate the total load on the regulator (based +on all its consumers) and change operating mode (if necessary and permitted) +to best match the current operating load. + +The load_uA value can be determined from the consumer's datasheet. e.g. most +datasheets have tables showing the maximum current consumed in certain +situations. + +Most consumers will use indirect operating mode control since they have no +knowledge of the regulator or whether the regulator is shared with other +consumers. + +Direct operating mode control. +------------------------------ + +Bespoke or tightly coupled drivers may want to directly control regulator +operating mode depending on their operating point. This can be achieved by +calling:: + + int regulator_set_mode(struct regulator *regulator, unsigned int mode); + unsigned int regulator_get_mode(struct regulator *regulator); + +Direct mode will only be used by consumers that *know* about the regulator and +are not sharing the regulator with other consumers. + + +6. Regulator Events +=================== + +Regulators can notify consumers of external events. Events could be received by +consumers under regulator stress or failure conditions. + +Consumers can register interest in regulator events by calling:: + + int regulator_register_notifier(struct regulator *regulator, + struct notifier_block *nb); + +Consumers can unregister interest by calling:: + + int regulator_unregister_notifier(struct regulator *regulator, + struct notifier_block *nb); + +Regulators use the kernel notifier framework to send event to their interested +consumers. + +7. Regulator Direct Register Access +=================================== + +Some kinds of power management hardware or firmware are designed such that +they need to do low-level hardware access to regulators, with no involvement +from the kernel. Examples of such devices are: + +- clocksource with a voltage-controlled oscillator and control logic to change + the supply voltage over I2C to achieve a desired output clock rate +- thermal management firmware that can issue an arbitrary I2C transaction to + perform system poweroff during overtemperature conditions + +To set up such a device/firmware, various parameters like I2C address of the +regulator, addresses of various regulator registers etc. need to be configured +to it. The regulator framework provides the following helpers for querying +these details. + +Bus-specific details, like I2C addresses or transfer rates are handled by the +regmap framework. To get the regulator's regmap (if supported), use:: + + struct regmap *regulator_get_regmap(struct regulator *regulator); + +To obtain the hardware register offset and bitmask for the regulator's voltage +selector register, use:: + + int regulator_get_hardware_vsel_register(struct regulator *regulator, + unsigned *vsel_reg, + unsigned *vsel_mask); + +To convert a regulator framework voltage selector code (used by +regulator_list_voltage) to a hardware-specific voltage selector that can be +directly written to the voltage selector register, use:: + + int regulator_list_hardware_vsel(struct regulator *regulator, + unsigned selector); diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt deleted file mode 100644 index e51564c1a140..000000000000 --- a/Documentation/power/regulator/consumer.txt +++ /dev/null @@ -1,218 +0,0 @@ -Regulator Consumer Driver Interface -=================================== - -This text describes the regulator interface for consumer device drivers. -Please see overview.txt for a description of the terms used in this text. - - -1. Consumer Regulator Access (static & dynamic drivers) -======================================================= - -A consumer driver can get access to its supply regulator by calling :- - -regulator = regulator_get(dev, "Vcc"); - -The consumer passes in its struct device pointer and power supply ID. The core -then finds the correct regulator by consulting a machine specific lookup table. -If the lookup is successful then this call will return a pointer to the struct -regulator that supplies this consumer. - -To release the regulator the consumer driver should call :- - -regulator_put(regulator); - -Consumers can be supplied by more than one regulator e.g. codec consumer with -analog and digital supplies :- - -digital = regulator_get(dev, "Vcc"); /* digital core */ -analog = regulator_get(dev, "Avdd"); /* analog */ - -The regulator access functions regulator_get() and regulator_put() will -usually be called in your device drivers probe() and remove() respectively. - - -2. Regulator Output Enable & Disable (static & dynamic drivers) -==================================================================== - -A consumer can enable its power supply by calling:- - -int regulator_enable(regulator); - -NOTE: The supply may already be enabled before regulator_enabled() is called. -This may happen if the consumer shares the regulator or the regulator has been -previously enabled by bootloader or kernel board initialization code. - -A consumer can determine if a regulator is enabled by calling :- - -int regulator_is_enabled(regulator); - -This will return > zero when the regulator is enabled. - - -A consumer can disable its supply when no longer needed by calling :- - -int regulator_disable(regulator); - -NOTE: This may not disable the supply if it's shared with other consumers. The -regulator will only be disabled when the enabled reference count is zero. - -Finally, a regulator can be forcefully disabled in the case of an emergency :- - -int regulator_force_disable(regulator); - -NOTE: this will immediately and forcefully shutdown the regulator output. All -consumers will be powered off. - - -3. Regulator Voltage Control & Status (dynamic drivers) -====================================================== - -Some consumer drivers need to be able to dynamically change their supply -voltage to match system operating points. e.g. CPUfreq drivers can scale -voltage along with frequency to save power, SD drivers may need to select the -correct card voltage, etc. - -Consumers can control their supply voltage by calling :- - -int regulator_set_voltage(regulator, min_uV, max_uV); - -Where min_uV and max_uV are the minimum and maximum acceptable voltages in -microvolts. - -NOTE: this can be called when the regulator is enabled or disabled. If called -when enabled, then the voltage changes instantly, otherwise the voltage -configuration changes and the voltage is physically set when the regulator is -next enabled. - -The regulators configured voltage output can be found by calling :- - -int regulator_get_voltage(regulator); - -NOTE: get_voltage() will return the configured output voltage whether the -regulator is enabled or disabled and should NOT be used to determine regulator -output state. However this can be used in conjunction with is_enabled() to -determine the regulator physical output voltage. - - -4. Regulator Current Limit Control & Status (dynamic drivers) -=========================================================== - -Some consumer drivers need to be able to dynamically change their supply -current limit to match system operating points. e.g. LCD backlight driver can -change the current limit to vary the backlight brightness, USB drivers may want -to set the limit to 500mA when supplying power. - -Consumers can control their supply current limit by calling :- - -int regulator_set_current_limit(regulator, min_uA, max_uA); - -Where min_uA and max_uA are the minimum and maximum acceptable current limit in -microamps. - -NOTE: this can be called when the regulator is enabled or disabled. If called -when enabled, then the current limit changes instantly, otherwise the current -limit configuration changes and the current limit is physically set when the -regulator is next enabled. - -A regulators current limit can be found by calling :- - -int regulator_get_current_limit(regulator); - -NOTE: get_current_limit() will return the current limit whether the regulator -is enabled or disabled and should not be used to determine regulator current -load. - - -5. Regulator Operating Mode Control & Status (dynamic drivers) -============================================================= - -Some consumers can further save system power by changing the operating mode of -their supply regulator to be more efficient when the consumers operating state -changes. e.g. consumer driver is idle and subsequently draws less current - -Regulator operating mode can be changed indirectly or directly. - -Indirect operating mode control. --------------------------------- -Consumer drivers can request a change in their supply regulator operating mode -by calling :- - -int regulator_set_load(struct regulator *regulator, int load_uA); - -This will cause the core to recalculate the total load on the regulator (based -on all its consumers) and change operating mode (if necessary and permitted) -to best match the current operating load. - -The load_uA value can be determined from the consumer's datasheet. e.g. most -datasheets have tables showing the maximum current consumed in certain -situations. - -Most consumers will use indirect operating mode control since they have no -knowledge of the regulator or whether the regulator is shared with other -consumers. - -Direct operating mode control. ------------------------------- -Bespoke or tightly coupled drivers may want to directly control regulator -operating mode depending on their operating point. This can be achieved by -calling :- - -int regulator_set_mode(struct regulator *regulator, unsigned int mode); -unsigned int regulator_get_mode(struct regulator *regulator); - -Direct mode will only be used by consumers that *know* about the regulator and -are not sharing the regulator with other consumers. - - -6. Regulator Events -=================== -Regulators can notify consumers of external events. Events could be received by -consumers under regulator stress or failure conditions. - -Consumers can register interest in regulator events by calling :- - -int regulator_register_notifier(struct regulator *regulator, - struct notifier_block *nb); - -Consumers can unregister interest by calling :- - -int regulator_unregister_notifier(struct regulator *regulator, - struct notifier_block *nb); - -Regulators use the kernel notifier framework to send event to their interested -consumers. - -7. Regulator Direct Register Access -=================================== -Some kinds of power management hardware or firmware are designed such that -they need to do low-level hardware access to regulators, with no involvement -from the kernel. Examples of such devices are: - -- clocksource with a voltage-controlled oscillator and control logic to change - the supply voltage over I2C to achieve a desired output clock rate -- thermal management firmware that can issue an arbitrary I2C transaction to - perform system poweroff during overtemperature conditions - -To set up such a device/firmware, various parameters like I2C address of the -regulator, addresses of various regulator registers etc. need to be configured -to it. The regulator framework provides the following helpers for querying -these details. - -Bus-specific details, like I2C addresses or transfer rates are handled by the -regmap framework. To get the regulator's regmap (if supported), use :- - -struct regmap *regulator_get_regmap(struct regulator *regulator); - -To obtain the hardware register offset and bitmask for the regulator's voltage -selector register, use :- - -int regulator_get_hardware_vsel_register(struct regulator *regulator, - unsigned *vsel_reg, - unsigned *vsel_mask); - -To convert a regulator framework voltage selector code (used by -regulator_list_voltage) to a hardware-specific voltage selector that can be -directly written to the voltage selector register, use :- - -int regulator_list_hardware_vsel(struct regulator *regulator, - unsigned selector); diff --git a/Documentation/power/regulator/design.rst b/Documentation/power/regulator/design.rst new file mode 100644 index 000000000000..3b09c6841dc4 --- /dev/null +++ b/Documentation/power/regulator/design.rst @@ -0,0 +1,38 @@ +========================== +Regulator API design notes +========================== + +This document provides a brief, partially structured, overview of some +of the design considerations which impact the regulator API design. + +Safety +------ + + - Errors in regulator configuration can have very serious consequences + for the system, potentially including lasting hardware damage. + - It is not possible to automatically determine the power configuration + of the system - software-equivalent variants of the same chip may + have different power requirements, and not all components with power + requirements are visible to software. + +.. note:: + + The API should make no changes to the hardware state unless it has + specific knowledge that these changes are safe to perform on this + particular system. + +Consumer use cases +------------------ + + - The overwhelming majority of devices in a system will have no + requirement to do any runtime configuration of their power beyond + being able to turn it on or off. + + - Many of the power supplies in the system will be shared between many + different consumers. + +.. note:: + + The consumer API should be structured so that these use cases are + very easy to handle and so that consumers will work with shared + supplies without any additional effort. diff --git a/Documentation/power/regulator/design.txt b/Documentation/power/regulator/design.txt deleted file mode 100644 index fdd919b96830..000000000000 --- a/Documentation/power/regulator/design.txt +++ /dev/null @@ -1,33 +0,0 @@ -Regulator API design notes -========================== - -This document provides a brief, partially structured, overview of some -of the design considerations which impact the regulator API design. - -Safety ------- - - - Errors in regulator configuration can have very serious consequences - for the system, potentially including lasting hardware damage. - - It is not possible to automatically determine the power configuration - of the system - software-equivalent variants of the same chip may - have different power requirements, and not all components with power - requirements are visible to software. - - => The API should make no changes to the hardware state unless it has - specific knowledge that these changes are safe to perform on this - particular system. - -Consumer use cases ------------------- - - - The overwhelming majority of devices in a system will have no - requirement to do any runtime configuration of their power beyond - being able to turn it on or off. - - - Many of the power supplies in the system will be shared between many - different consumers. - - => The consumer API should be structured so that these use cases are - very easy to handle and so that consumers will work with shared - supplies without any additional effort. diff --git a/Documentation/power/regulator/machine.rst b/Documentation/power/regulator/machine.rst new file mode 100644 index 000000000000..22fffefaa3ad --- /dev/null +++ b/Documentation/power/regulator/machine.rst @@ -0,0 +1,97 @@ +================================== +Regulator Machine Driver Interface +================================== + +The regulator machine driver interface is intended for board/machine specific +initialisation code to configure the regulator subsystem. + +Consider the following machine:: + + Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V] + | + +-> [Consumer B @ 3.3V] + +The drivers for consumers A & B must be mapped to the correct regulator in +order to control their power supplies. This mapping can be achieved in machine +initialisation code by creating a struct regulator_consumer_supply for +each regulator:: + + struct regulator_consumer_supply { + const char *dev_name; /* consumer dev_name() */ + const char *supply; /* consumer supply - e.g. "vcc" */ + }; + +e.g. for the machine above:: + + static struct regulator_consumer_supply regulator1_consumers[] = { + REGULATOR_SUPPLY("Vcc", "consumer B"), + }; + + static struct regulator_consumer_supply regulator2_consumers[] = { + REGULATOR_SUPPLY("Vcc", "consumer A"), + }; + +This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2 +to the 'Vcc' supply for Consumer A. + +Constraints can now be registered by defining a struct regulator_init_data +for each regulator power domain. This structure also maps the consumers +to their supply regulators:: + + static struct regulator_init_data regulator1_data = { + .constraints = { + .name = "Regulator-1", + .min_uV = 3300000, + .max_uV = 3300000, + .valid_modes_mask = REGULATOR_MODE_NORMAL, + }, + .num_consumer_supplies = ARRAY_SIZE(regulator1_consumers), + .consumer_supplies = regulator1_consumers, + }; + +The name field should be set to something that is usefully descriptive +for the board for configuration of supplies for other regulators and +for use in logging and other diagnostic output. Normally the name +used for the supply rail in the schematic is a good choice. If no +name is provided then the subsystem will choose one. + +Regulator-1 supplies power to Regulator-2. This relationship must be registered +with the core so that Regulator-1 is also enabled when Consumer A enables its +supply (Regulator-2). The supply regulator is set by the supply_regulator +field below and co:: + + static struct regulator_init_data regulator2_data = { + .supply_regulator = "Regulator-1", + .constraints = { + .min_uV = 1800000, + .max_uV = 2000000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE, + .valid_modes_mask = REGULATOR_MODE_NORMAL, + }, + .num_consumer_supplies = ARRAY_SIZE(regulator2_consumers), + .consumer_supplies = regulator2_consumers, + }; + +Finally the regulator devices must be registered in the usual manner:: + + static struct platform_device regulator_devices[] = { + { + .name = "regulator", + .id = DCDC_1, + .dev = { + .platform_data = ®ulator1_data, + }, + }, + { + .name = "regulator", + .id = DCDC_2, + .dev = { + .platform_data = ®ulator2_data, + }, + }, + }; + /* register regulator 1 device */ + platform_device_register(®ulator_devices[0]); + + /* register regulator 2 device */ + platform_device_register(®ulator_devices[1]); diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt deleted file mode 100644 index eff4dcaaa252..000000000000 --- a/Documentation/power/regulator/machine.txt +++ /dev/null @@ -1,96 +0,0 @@ -Regulator Machine Driver Interface -=================================== - -The regulator machine driver interface is intended for board/machine specific -initialisation code to configure the regulator subsystem. - -Consider the following machine :- - - Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V] - | - +-> [Consumer B @ 3.3V] - -The drivers for consumers A & B must be mapped to the correct regulator in -order to control their power supplies. This mapping can be achieved in machine -initialisation code by creating a struct regulator_consumer_supply for -each regulator. - -struct regulator_consumer_supply { - const char *dev_name; /* consumer dev_name() */ - const char *supply; /* consumer supply - e.g. "vcc" */ -}; - -e.g. for the machine above - -static struct regulator_consumer_supply regulator1_consumers[] = { - REGULATOR_SUPPLY("Vcc", "consumer B"), -}; - -static struct regulator_consumer_supply regulator2_consumers[] = { - REGULATOR_SUPPLY("Vcc", "consumer A"), -}; - -This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2 -to the 'Vcc' supply for Consumer A. - -Constraints can now be registered by defining a struct regulator_init_data -for each regulator power domain. This structure also maps the consumers -to their supply regulators :- - -static struct regulator_init_data regulator1_data = { - .constraints = { - .name = "Regulator-1", - .min_uV = 3300000, - .max_uV = 3300000, - .valid_modes_mask = REGULATOR_MODE_NORMAL, - }, - .num_consumer_supplies = ARRAY_SIZE(regulator1_consumers), - .consumer_supplies = regulator1_consumers, -}; - -The name field should be set to something that is usefully descriptive -for the board for configuration of supplies for other regulators and -for use in logging and other diagnostic output. Normally the name -used for the supply rail in the schematic is a good choice. If no -name is provided then the subsystem will choose one. - -Regulator-1 supplies power to Regulator-2. This relationship must be registered -with the core so that Regulator-1 is also enabled when Consumer A enables its -supply (Regulator-2). The supply regulator is set by the supply_regulator -field below and co:- - -static struct regulator_init_data regulator2_data = { - .supply_regulator = "Regulator-1", - .constraints = { - .min_uV = 1800000, - .max_uV = 2000000, - .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE, - .valid_modes_mask = REGULATOR_MODE_NORMAL, - }, - .num_consumer_supplies = ARRAY_SIZE(regulator2_consumers), - .consumer_supplies = regulator2_consumers, -}; - -Finally the regulator devices must be registered in the usual manner. - -static struct platform_device regulator_devices[] = { - { - .name = "regulator", - .id = DCDC_1, - .dev = { - .platform_data = ®ulator1_data, - }, - }, - { - .name = "regulator", - .id = DCDC_2, - .dev = { - .platform_data = ®ulator2_data, - }, - }, -}; -/* register regulator 1 device */ -platform_device_register(®ulator_devices[0]); - -/* register regulator 2 device */ -platform_device_register(®ulator_devices[1]); diff --git a/Documentation/power/regulator/overview.rst b/Documentation/power/regulator/overview.rst new file mode 100644 index 000000000000..ee494c70a7c4 --- /dev/null +++ b/Documentation/power/regulator/overview.rst @@ -0,0 +1,178 @@ +============================================= +Linux voltage and current regulator framework +============================================= + +About +===== + +This framework is designed to provide a standard kernel interface to control +voltage and current regulators. + +The intention is to allow systems to dynamically control regulator power output +in order to save power and prolong battery life. This applies to both voltage +regulators (where voltage output is controllable) and current sinks (where +current limit is controllable). + +(C) 2008 Wolfson Microelectronics PLC. + +Author: Liam Girdwood + + +Nomenclature +============ + +Some terms used in this document: + + - Regulator + - Electronic device that supplies power to other devices. + Most regulators can enable and disable their output while + some can control their output voltage and or current. + + Input Voltage -> Regulator -> Output Voltage + + + - PMIC + - Power Management IC. An IC that contains numerous + regulators and often contains other subsystems. + + + - Consumer + - Electronic device that is supplied power by a regulator. + Consumers can be classified into two types:- + + Static: consumer does not change its supply voltage or + current limit. It only needs to enable or disable its + power supply. Its supply voltage is set by the hardware, + bootloader, firmware or kernel board initialisation code. + + Dynamic: consumer needs to change its supply voltage or + current limit to meet operation demands. + + + - Power Domain + - Electronic circuit that is supplied its input power by the + output power of a regulator, switch or by another power + domain. + + The supply regulator may be behind a switch(s). i.e.:: + + Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A] + | | + | +-> [Consumer B], [Consumer C] + | + +-> [Consumer D], [Consumer E] + + That is one regulator and three power domains: + + - Domain 1: Switch-1, Consumers D & E. + - Domain 2: Switch-2, Consumers B & C. + - Domain 3: Consumer A. + + and this represents a "supplies" relationship: + + Domain-1 --> Domain-2 --> Domain-3. + + A power domain may have regulators that are supplied power + by other regulators. i.e.:: + + Regulator-1 -+-> Regulator-2 -+-> [Consumer A] + | + +-> [Consumer B] + + This gives us two regulators and two power domains: + + - Domain 1: Regulator-2, Consumer B. + - Domain 2: Consumer A. + + and a "supplies" relationship: + + Domain-1 --> Domain-2 + + + - Constraints + - Constraints are used to define power levels for performance + and hardware protection. Constraints exist at three levels: + + Regulator Level: This is defined by the regulator hardware + operating parameters and is specified in the regulator + datasheet. i.e. + + - voltage output is in the range 800mV -> 3500mV. + - regulator current output limit is 20mA @ 5V but is + 10mA @ 10V. + + Power Domain Level: This is defined in software by kernel + level board initialisation code. It is used to constrain a + power domain to a particular power range. i.e. + + - Domain-1 voltage is 3300mV + - Domain-2 voltage is 1400mV -> 1600mV + - Domain-3 current limit is 0mA -> 20mA. + + Consumer Level: This is defined by consumer drivers + dynamically setting voltage or current limit levels. + + e.g. a consumer backlight driver asks for a current increase + from 5mA to 10mA to increase LCD illumination. This passes + to through the levels as follows :- + + Consumer: need to increase LCD brightness. Lookup and + request next current mA value in brightness table (the + consumer driver could be used on several different + personalities based upon the same reference device). + + Power Domain: is the new current limit within the domain + operating limits for this domain and system state (e.g. + battery power, USB power) + + Regulator Domains: is the new current limit within the + regulator operating parameters for input/output voltage. + + If the regulator request passes all the constraint tests + then the new regulator value is applied. + + +Design +====== + +The framework is designed and targeted at SoC based devices but may also be +relevant to non SoC devices and is split into the following four interfaces:- + + + 1. Consumer driver interface. + + This uses a similar API to the kernel clock interface in that consumer + drivers can get and put a regulator (like they can with clocks atm) and + get/set voltage, current limit, mode, enable and disable. This should + allow consumers complete control over their supply voltage and current + limit. This also compiles out if not in use so drivers can be reused in + systems with no regulator based power control. + + See Documentation/power/regulator/consumer.rst + + 2. Regulator driver interface. + + This allows regulator drivers to register their regulators and provide + operations to the core. It also has a notifier call chain for propagating + regulator events to clients. + + See Documentation/power/regulator/regulator.rst + + 3. Machine interface. + + This interface is for machine specific code and allows the creation of + voltage/current domains (with constraints) for each regulator. It can + provide regulator constraints that will prevent device damage through + overvoltage or overcurrent caused by buggy client drivers. It also + allows the creation of a regulator tree whereby some regulators are + supplied by others (similar to a clock tree). + + See Documentation/power/regulator/machine.rst + + 4. Userspace ABI. + + The framework also exports a lot of useful voltage/current/opmode data to + userspace via sysfs. This could be used to help monitor device power + consumption and status. + + See Documentation/ABI/testing/sysfs-class-regulator diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt deleted file mode 100644 index 721b4739ec32..000000000000 --- a/Documentation/power/regulator/overview.txt +++ /dev/null @@ -1,171 +0,0 @@ -Linux voltage and current regulator framework -============================================= - -About -===== - -This framework is designed to provide a standard kernel interface to control -voltage and current regulators. - -The intention is to allow systems to dynamically control regulator power output -in order to save power and prolong battery life. This applies to both voltage -regulators (where voltage output is controllable) and current sinks (where -current limit is controllable). - -(C) 2008 Wolfson Microelectronics PLC. -Author: Liam Girdwood - - -Nomenclature -============ - -Some terms used in this document:- - - o Regulator - Electronic device that supplies power to other devices. - Most regulators can enable and disable their output while - some can control their output voltage and or current. - - Input Voltage -> Regulator -> Output Voltage - - - o PMIC - Power Management IC. An IC that contains numerous regulators - and often contains other subsystems. - - - o Consumer - Electronic device that is supplied power by a regulator. - Consumers can be classified into two types:- - - Static: consumer does not change its supply voltage or - current limit. It only needs to enable or disable its - power supply. Its supply voltage is set by the hardware, - bootloader, firmware or kernel board initialisation code. - - Dynamic: consumer needs to change its supply voltage or - current limit to meet operation demands. - - - o Power Domain - Electronic circuit that is supplied its input power by the - output power of a regulator, switch or by another power - domain. - - The supply regulator may be behind a switch(s). i.e. - - Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A] - | | - | +-> [Consumer B], [Consumer C] - | - +-> [Consumer D], [Consumer E] - - That is one regulator and three power domains: - - Domain 1: Switch-1, Consumers D & E. - Domain 2: Switch-2, Consumers B & C. - Domain 3: Consumer A. - - and this represents a "supplies" relationship: - - Domain-1 --> Domain-2 --> Domain-3. - - A power domain may have regulators that are supplied power - by other regulators. i.e. - - Regulator-1 -+-> Regulator-2 -+-> [Consumer A] - | - +-> [Consumer B] - - This gives us two regulators and two power domains: - - Domain 1: Regulator-2, Consumer B. - Domain 2: Consumer A. - - and a "supplies" relationship: - - Domain-1 --> Domain-2 - - - o Constraints - Constraints are used to define power levels for performance - and hardware protection. Constraints exist at three levels: - - Regulator Level: This is defined by the regulator hardware - operating parameters and is specified in the regulator - datasheet. i.e. - - - voltage output is in the range 800mV -> 3500mV. - - regulator current output limit is 20mA @ 5V but is - 10mA @ 10V. - - Power Domain Level: This is defined in software by kernel - level board initialisation code. It is used to constrain a - power domain to a particular power range. i.e. - - - Domain-1 voltage is 3300mV - - Domain-2 voltage is 1400mV -> 1600mV - - Domain-3 current limit is 0mA -> 20mA. - - Consumer Level: This is defined by consumer drivers - dynamically setting voltage or current limit levels. - - e.g. a consumer backlight driver asks for a current increase - from 5mA to 10mA to increase LCD illumination. This passes - to through the levels as follows :- - - Consumer: need to increase LCD brightness. Lookup and - request next current mA value in brightness table (the - consumer driver could be used on several different - personalities based upon the same reference device). - - Power Domain: is the new current limit within the domain - operating limits for this domain and system state (e.g. - battery power, USB power) - - Regulator Domains: is the new current limit within the - regulator operating parameters for input/output voltage. - - If the regulator request passes all the constraint tests - then the new regulator value is applied. - - -Design -====== - -The framework is designed and targeted at SoC based devices but may also be -relevant to non SoC devices and is split into the following four interfaces:- - - - 1. Consumer driver interface. - - This uses a similar API to the kernel clock interface in that consumer - drivers can get and put a regulator (like they can with clocks atm) and - get/set voltage, current limit, mode, enable and disable. This should - allow consumers complete control over their supply voltage and current - limit. This also compiles out if not in use so drivers can be reused in - systems with no regulator based power control. - - See Documentation/power/regulator/consumer.txt - - 2. Regulator driver interface. - - This allows regulator drivers to register their regulators and provide - operations to the core. It also has a notifier call chain for propagating - regulator events to clients. - - See Documentation/power/regulator/regulator.txt - - 3. Machine interface. - - This interface is for machine specific code and allows the creation of - voltage/current domains (with constraints) for each regulator. It can - provide regulator constraints that will prevent device damage through - overvoltage or overcurrent caused by buggy client drivers. It also - allows the creation of a regulator tree whereby some regulators are - supplied by others (similar to a clock tree). - - See Documentation/power/regulator/machine.txt - - 4. Userspace ABI. - - The framework also exports a lot of useful voltage/current/opmode data to - userspace via sysfs. This could be used to help monitor device power - consumption and status. - - See Documentation/ABI/testing/sysfs-class-regulator diff --git a/Documentation/power/regulator/regulator.rst b/Documentation/power/regulator/regulator.rst new file mode 100644 index 000000000000..794b3256fbb9 --- /dev/null +++ b/Documentation/power/regulator/regulator.rst @@ -0,0 +1,32 @@ +========================== +Regulator Driver Interface +========================== + +The regulator driver interface is relatively simple and designed to allow +regulator drivers to register their services with the core framework. + + +Registration +============ + +Drivers can register a regulator by calling:: + + struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, + const struct regulator_config *config); + +This will register the regulator's capabilities and operations to the regulator +core. + +Regulators can be unregistered by calling:: + + void regulator_unregister(struct regulator_dev *rdev); + + +Regulator Events +================ + +Regulators can send events (e.g. overtemperature, undervoltage, etc) to +consumer drivers by calling:: + + int regulator_notifier_call_chain(struct regulator_dev *rdev, + unsigned long event, void *data); diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt deleted file mode 100644 index b17e5833ce21..000000000000 --- a/Documentation/power/regulator/regulator.txt +++ /dev/null @@ -1,30 +0,0 @@ -Regulator Driver Interface -========================== - -The regulator driver interface is relatively simple and designed to allow -regulator drivers to register their services with the core framework. - - -Registration -============ - -Drivers can register a regulator by calling :- - -struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, - const struct regulator_config *config); - -This will register the regulator's capabilities and operations to the regulator -core. - -Regulators can be unregistered by calling :- - -void regulator_unregister(struct regulator_dev *rdev); - - -Regulator Events -================ -Regulators can send events (e.g. overtemperature, undervoltage, etc) to -consumer drivers by calling :- - -int regulator_notifier_call_chain(struct regulator_dev *rdev, - unsigned long event, void *data); diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst new file mode 100644 index 000000000000..2c2ec99b5088 --- /dev/null +++ b/Documentation/power/runtime_pm.rst @@ -0,0 +1,940 @@ +================================================== +Runtime Power Management Framework for I/O Devices +================================================== + +(C) 2009-2011 Rafael J. Wysocki , Novell Inc. + +(C) 2010 Alan Stern + +(C) 2014 Intel Corp., Rafael J. Wysocki + +1. Introduction +=============== + +Support for runtime power management (runtime PM) of I/O devices is provided +at the power management core (PM core) level by means of: + +* The power management workqueue pm_wq in which bus types and device drivers can + put their PM-related work items. It is strongly recommended that pm_wq be + used for queuing all work items related to runtime PM, because this allows + them to be synchronized with system-wide power transitions (suspend to RAM, + hibernation and resume from system sleep states). pm_wq is declared in + include/linux/pm_runtime.h and defined in kernel/power/main.c. + +* A number of runtime PM fields in the 'power' member of 'struct device' (which + is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can + be used for synchronizing runtime PM operations with one another. + +* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in + include/linux/pm.h). + +* A set of helper functions defined in drivers/base/power/runtime.c that can be + used for carrying out runtime PM operations in such a way that the + synchronization between them is taken care of by the PM core. Bus types and + device drivers are encouraged to use these functions. + +The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM +fields of 'struct dev_pm_info' and the core helper functions provided for +runtime PM are described below. + +2. Device Runtime PM Callbacks +============================== + +There are three device runtime PM callbacks defined in 'struct dev_pm_ops':: + + struct dev_pm_ops { + ... + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + int (*runtime_idle)(struct device *dev); + ... + }; + +The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks +are executed by the PM core for the device's subsystem that may be either of +the following: + + 1. PM domain of the device, if the device's PM domain object, dev->pm_domain, + is present. + + 2. Device type of the device, if both dev->type and dev->type->pm are present. + + 3. Device class of the device, if both dev->class and dev->class->pm are + present. + + 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. + +If the subsystem chosen by applying the above rules doesn't provide the relevant +callback, the PM core will invoke the corresponding driver callback stored in +dev->driver->pm directly (if present). + +The PM core always checks which callback to use in the order given above, so the +priority order of callbacks from high to low is: PM domain, device type, class +and bus type. Moreover, the high-priority one will always take precedence over +a low-priority one. The PM domain, bus type, device type and class callbacks +are referred to as subsystem-level callbacks in what follows. + +By default, the callbacks are always invoked in process context with interrupts +enabled. However, the pm_runtime_irq_safe() helper function can be used to tell +the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume() +and ->runtime_idle() callbacks for the given device in atomic context with +interrupts disabled. This implies that the callback routines in question must +not block or sleep, but it also means that the synchronous helper functions +listed at the end of Section 4 may be used for that device within an interrupt +handler or generally in an atomic context. + +The subsystem-level suspend callback, if present, is _entirely_ _responsible_ +for handling the suspend of the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_suspend() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_suspend() +callback in a device driver as long as the subsystem-level suspend callback +knows what to do to handle the device). + + * Once the subsystem-level suspend callback (or the driver suspend callback, + if invoked directly) has completed successfully for the given device, the PM + core regards the device as suspended, which need not mean that it has been + put into a low power state. It is supposed to mean, however, that the + device will not process data and will not communicate with the CPU(s) and + RAM until the appropriate resume callback is executed for it. The runtime + PM status of a device after successful execution of the suspend callback is + 'suspended'. + + * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM + status remains 'active', which means that the device _must_ be fully + operational afterwards. + + * If the suspend callback returns an error code different from -EBUSY and + -EAGAIN, the PM core regards this as a fatal error and will refuse to run + the helper functions described in Section 4 for the device until its status + is directly set to either 'active', or 'suspended' (the PM core provides + special helper functions for this purpose). + +In particular, if the driver requires remote wakeup capability (i.e. hardware +mechanism allowing the device to request a change of its power state, such as +PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the +device, then ->runtime_suspend() should return -EBUSY. On the other hand, if +device_can_wakeup() returns 'true' for the device and the device is put into a +low-power state during the execution of the suspend callback, it is expected +that remote wakeup will be enabled for the device. Generally, remote wakeup +should be enabled for all input devices put into low-power states at run time. + +The subsystem-level resume callback, if present, is **entirely responsible** for +handling the resume of the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_resume() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_resume() +callback in a device driver as long as the subsystem-level resume callback knows +what to do to handle the device). + + * Once the subsystem-level resume callback (or the driver resume callback, if + invoked directly) has completed successfully, the PM core regards the device + as fully operational, which means that the device _must_ be able to complete + I/O operations as needed. The runtime PM status of the device is then + 'active'. + + * If the resume callback returns an error code, the PM core regards this as a + fatal error and will refuse to run the helper functions described in Section + 4 for the device, until its status is directly set to either 'active', or + 'suspended' (by means of special helper functions provided by the PM core + for this purpose). + +The idle callback (a subsystem-level one, if present, or the driver one) is +executed by the PM core whenever the device appears to be idle, which is +indicated to the PM core by two counters, the device's usage counter and the +counter of 'active' children of the device. + + * If any of these counters is decreased using a helper function provided by + the PM core and it turns out to be equal to zero, the other counter is + checked. If that counter also is equal to zero, the PM core executes the + idle callback with the device as its argument. + +The action performed by the idle callback is totally dependent on the subsystem +(or driver) in question, but the expected and recommended action is to check +if the device can be suspended (i.e. if all of the conditions necessary for +suspending the device are satisfied) and to queue up a suspend request for the +device in that case. If there is no idle callback, or if the callback returns +0, then the PM core will attempt to carry out a runtime suspend of the device, +also respecting devices configured for autosuspend. In essence this means a +call to pm_runtime_autosuspend() (do note that drivers needs to update the +device last busy mark, pm_runtime_mark_last_busy(), to control the delay under +this circumstance). To prevent this (for example, if the callback routine has +started a delayed suspend), the routine must return a non-zero value. Negative +error return codes are ignored by the PM core. + +The helper functions provided by the PM core, described in Section 4, guarantee +that the following constraints are met with respect to runtime PM callbacks for +one device: + +(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute + ->runtime_suspend() in parallel with ->runtime_resume() or with another + instance of ->runtime_suspend() for the same device) with the exception that + ->runtime_suspend() or ->runtime_resume() can be executed in parallel with + ->runtime_idle() (although ->runtime_idle() will not be started while any + of the other callbacks is being executed for the same device). + +(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' + devices (i.e. the PM core will only execute ->runtime_idle() or + ->runtime_suspend() for the devices the runtime PM status of which is + 'active'). + +(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device + the usage counter of which is equal to zero _and_ either the counter of + 'active' children of which is equal to zero, or the 'power.ignore_children' + flag of which is set. + +(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the + PM core will only execute ->runtime_resume() for the devices the runtime + PM status of which is 'suspended'). + +Additionally, the helper functions provided by the PM core obey the following +rules: + + * If ->runtime_suspend() is about to be executed or there's a pending request + to execute it, ->runtime_idle() will not be executed for the same device. + + * A request to execute or to schedule the execution of ->runtime_suspend() + will cancel any pending requests to execute ->runtime_idle() for the same + device. + + * If ->runtime_resume() is about to be executed or there's a pending request + to execute it, the other callbacks will not be executed for the same device. + + * A request to execute ->runtime_resume() will cancel any pending or + scheduled requests to execute the other callbacks for the same device, + except for scheduled autosuspends. + +3. Runtime PM Device Fields +=========================== + +The following device runtime PM fields are present in 'struct dev_pm_info', as +defined in include/linux/pm.h: + + `struct timer_list suspend_timer;` + - timer used for scheduling (delayed) suspend and autosuspend requests + + `unsigned long timer_expires;` + - timer expiration time, in jiffies (if this is different from zero, the + timer is running and will expire at that time, otherwise the timer is not + running) + + `struct work_struct work;` + - work structure used for queuing up requests (i.e. work items in pm_wq) + + `wait_queue_head_t wait_queue;` + - wait queue used if any of the helper functions needs to wait for another + one to complete + + `spinlock_t lock;` + - lock used for synchronization + + `atomic_t usage_count;` + - the usage counter of the device + + `atomic_t child_count;` + - the count of 'active' children of the device + + `unsigned int ignore_children;` + - if set, the value of child_count is ignored (but still updated) + + `unsigned int disable_depth;` + - used for disabling the helper functions (they work normally if this is + equal to zero); the initial value of it is 1 (i.e. runtime PM is + initially disabled for all devices) + + `int runtime_error;` + - if set, there was a fatal error (one of the callbacks returned error code + as described in Section 2), so the helper functions will not work until + this flag is cleared; this is the error code returned by the failing + callback + + `unsigned int idle_notification;` + - if set, ->runtime_idle() is being executed + + `unsigned int request_pending;` + - if set, there's a pending request (i.e. a work item queued up into pm_wq) + + `enum rpm_request request;` + - type of request that's pending (valid if request_pending is set) + + `unsigned int deferred_resume;` + - set if ->runtime_resume() is about to be run while ->runtime_suspend() is + being executed for that device and it is not practical to wait for the + suspend to complete; means "start a resume as soon as you've suspended" + + `enum rpm_status runtime_status;` + - the runtime PM status of the device; this field's initial value is + RPM_SUSPENDED, which means that each device is initially regarded by the + PM core as 'suspended', regardless of its real hardware status + + `unsigned int runtime_auto;` + - if set, indicates that the user space has allowed the device driver to + power manage the device at run time via the /sys/devices/.../power/control + `interface;` it may only be modified with the help of the pm_runtime_allow() + and pm_runtime_forbid() helper functions + + `unsigned int no_callbacks;` + - indicates that the device does not use the runtime PM callbacks (see + Section 8); it may be modified only by the pm_runtime_no_callbacks() + helper function + + `unsigned int irq_safe;` + - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks + will be invoked with the spinlock held and interrupts disabled + + `unsigned int use_autosuspend;` + - indicates that the device's driver supports delayed autosuspend (see + Section 9); it may be modified only by the + pm_runtime{_dont}_use_autosuspend() helper functions + + `unsigned int timer_autosuspends;` + - indicates that the PM core should attempt to carry out an autosuspend + when the timer expires rather than a normal suspend + + `int autosuspend_delay;` + - the delay time (in milliseconds) to be used for autosuspend + + `unsigned long last_busy;` + - the time (in jiffies) when the pm_runtime_mark_last_busy() helper + function was last called for this device; used in calculating inactivity + periods for autosuspend + +All of the above fields are members of the 'power' member of 'struct device'. + +4. Runtime PM Device Helper Functions +===================================== + +The following runtime PM helper functions are defined in +drivers/base/power/runtime.c and include/linux/pm_runtime.h: + + `void pm_runtime_init(struct device *dev);` + - initialize the device runtime PM fields in 'struct dev_pm_info' + + `void pm_runtime_remove(struct device *dev);` + - make sure that the runtime PM of the device will be disabled after + removing the device from device hierarchy + + `int pm_runtime_idle(struct device *dev);` + - execute the subsystem-level idle callback for the device; returns an + error code on failure, where -EINPROGRESS means that ->runtime_idle() is + already being executed; if there is no callback or the callback returns 0 + then run pm_runtime_autosuspend(dev) and return its result + + `int pm_runtime_suspend(struct device *dev);` + - execute the subsystem-level suspend callback for the device; returns 0 on + success, 1 if the device's runtime PM status was already 'suspended', or + error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt + to suspend the device again in future and -EACCES means that + 'power.disable_depth' is different from 0 + + `int pm_runtime_autosuspend(struct device *dev);` + - same as pm_runtime_suspend() except that the autosuspend delay is taken + `into account;` if pm_runtime_autosuspend_expiration() says the delay has + not yet expired then an autosuspend is scheduled for the appropriate time + and 0 is returned + + `int pm_runtime_resume(struct device *dev);` + - execute the subsystem-level resume callback for the device; returns 0 on + success, 1 if the device's runtime PM status was already 'active' or + error code on failure, where -EAGAIN means it may be safe to attempt to + resume the device again in future, but 'power.runtime_error' should be + checked additionally, and -EACCES means that 'power.disable_depth' is + different from 0 + + `int pm_request_idle(struct device *dev);` + - submit a request to execute the subsystem-level idle callback for the + device (the request is represented by a work item in pm_wq); returns 0 on + success or error code if the request has not been queued up + + `int pm_request_autosuspend(struct device *dev);` + - schedule the execution of the subsystem-level suspend callback for the + device when the autosuspend delay has expired; if the delay has already + expired then the work item is queued up immediately + + `int pm_schedule_suspend(struct device *dev, unsigned int delay);` + - schedule the execution of the subsystem-level suspend callback for the + device in future, where 'delay' is the time to wait before queuing up a + suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work + item is queued up immediately); returns 0 on success, 1 if the device's PM + runtime status was already 'suspended', or error code if the request + hasn't been scheduled (or queued up if 'delay' is 0); if the execution of + ->runtime_suspend() is already scheduled and not yet expired, the new + value of 'delay' will be used as the time to wait + + `int pm_request_resume(struct device *dev);` + - submit a request to execute the subsystem-level resume callback for the + device (the request is represented by a work item in pm_wq); returns 0 on + success, 1 if the device's runtime PM status was already 'active', or + error code if the request hasn't been queued up + + `void pm_runtime_get_noresume(struct device *dev);` + - increment the device's usage counter + + `int pm_runtime_get(struct device *dev);` + - increment the device's usage counter, run pm_request_resume(dev) and + return its result + + `int pm_runtime_get_sync(struct device *dev);` + - increment the device's usage counter, run pm_runtime_resume(dev) and + return its result + + `int pm_runtime_get_if_in_use(struct device *dev);` + - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the + runtime PM status is RPM_ACTIVE and the runtime PM usage counter is + nonzero, increment the counter and return 1; otherwise return 0 without + changing the counter + + `void pm_runtime_put_noidle(struct device *dev);` + - decrement the device's usage counter + + `int pm_runtime_put(struct device *dev);` + - decrement the device's usage counter; if the result is 0 then run + pm_request_idle(dev) and return its result + + `int pm_runtime_put_autosuspend(struct device *dev);` + - decrement the device's usage counter; if the result is 0 then run + pm_request_autosuspend(dev) and return its result + + `int pm_runtime_put_sync(struct device *dev);` + - decrement the device's usage counter; if the result is 0 then run + pm_runtime_idle(dev) and return its result + + `int pm_runtime_put_sync_suspend(struct device *dev);` + - decrement the device's usage counter; if the result is 0 then run + pm_runtime_suspend(dev) and return its result + + `int pm_runtime_put_sync_autosuspend(struct device *dev);` + - decrement the device's usage counter; if the result is 0 then run + pm_runtime_autosuspend(dev) and return its result + + `void pm_runtime_enable(struct device *dev);` + - decrement the device's 'power.disable_depth' field; if that field is equal + to zero, the runtime PM helper functions can execute subsystem-level + callbacks described in Section 2 for the device + + `int pm_runtime_disable(struct device *dev);` + - increment the device's 'power.disable_depth' field (if the value of that + field was previously zero, this prevents subsystem-level runtime PM + callbacks from being run for the device), make sure that all of the + pending runtime PM operations on the device are either completed or + canceled; returns 1 if there was a resume request pending and it was + necessary to execute the subsystem-level resume callback for the device + to satisfy that request, otherwise 0 is returned + + `int pm_runtime_barrier(struct device *dev);` + - check if there's a resume request pending for the device and resume it + (synchronously) in that case, cancel any other pending runtime PM requests + regarding it and wait for all runtime PM operations on it in progress to + complete; returns 1 if there was a resume request pending and it was + necessary to execute the subsystem-level resume callback for the device to + satisfy that request, otherwise 0 is returned + + `void pm_suspend_ignore_children(struct device *dev, bool enable);` + - set/unset the power.ignore_children flag of the device + + `int pm_runtime_set_active(struct device *dev);` + - clear the device's 'power.runtime_error' flag, set the device's runtime + PM status to 'active' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero); it will fail and return error code if the device has a parent + which is not active and the 'power.ignore_children' flag of which is unset + + `void pm_runtime_set_suspended(struct device *dev);` + - clear the device's 'power.runtime_error' flag, set the device's runtime + PM status to 'suspended' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero) + + `bool pm_runtime_active(struct device *dev);` + - return true if the device's runtime PM status is 'active' or its + 'power.disable_depth' field is not equal to zero, or false otherwise + + `bool pm_runtime_suspended(struct device *dev);` + - return true if the device's runtime PM status is 'suspended' and its + 'power.disable_depth' field is equal to zero, or false otherwise + + `bool pm_runtime_status_suspended(struct device *dev);` + - return true if the device's runtime PM status is 'suspended' + + `void pm_runtime_allow(struct device *dev);` + - set the power.runtime_auto flag for the device and decrease its usage + counter (used by the /sys/devices/.../power/control interface to + effectively allow the device to be power managed at run time) + + `void pm_runtime_forbid(struct device *dev);` + - unset the power.runtime_auto flag for the device and increase its usage + counter (used by the /sys/devices/.../power/control interface to + effectively prevent the device from being power managed at run time) + + `void pm_runtime_no_callbacks(struct device *dev);` + - set the power.no_callbacks flag for the device and remove the runtime + PM attributes from /sys/devices/.../power (or prevent them from being + added when the device is registered) + + `void pm_runtime_irq_safe(struct device *dev);` + - set the power.irq_safe flag for the device, causing the runtime-PM + callbacks to be invoked with interrupts off + + `bool pm_runtime_is_irq_safe(struct device *dev);` + - return true if power.irq_safe flag was set for the device, causing + the runtime-PM callbacks to be invoked with interrupts off + + `void pm_runtime_mark_last_busy(struct device *dev);` + - set the power.last_busy field to the current time + + `void pm_runtime_use_autosuspend(struct device *dev);` + - set the power.use_autosuspend flag, enabling autosuspend delays; call + pm_runtime_get_sync if the flag was previously cleared and + power.autosuspend_delay is negative + + `void pm_runtime_dont_use_autosuspend(struct device *dev);` + - clear the power.use_autosuspend flag, disabling autosuspend delays; + decrement the device's usage counter if the flag was previously set and + power.autosuspend_delay is negative; call pm_runtime_idle + + `void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);` + - set the power.autosuspend_delay value to 'delay' (expressed in + milliseconds); if 'delay' is negative then runtime suspends are + prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be + called or the device's usage counter may be decremented and + pm_runtime_idle called depending on if power.autosuspend_delay is + changed to or from a negative value; if power.use_autosuspend is clear, + pm_runtime_idle is called + + `unsigned long pm_runtime_autosuspend_expiration(struct device *dev);` + - calculate the time when the current autosuspend delay period will expire, + based on power.last_busy and power.autosuspend_delay; if the delay time + is 1000 ms or larger then the expiration time is rounded up to the + nearest second; returns 0 if the delay period has already expired or + power.use_autosuspend isn't set, otherwise returns the expiration time + in jiffies + +It is safe to execute the following helper functions from interrupt context: + +- pm_request_idle() +- pm_request_autosuspend() +- pm_schedule_suspend() +- pm_request_resume() +- pm_runtime_get_noresume() +- pm_runtime_get() +- pm_runtime_put_noidle() +- pm_runtime_put() +- pm_runtime_put_autosuspend() +- pm_runtime_enable() +- pm_suspend_ignore_children() +- pm_runtime_set_active() +- pm_runtime_set_suspended() +- pm_runtime_suspended() +- pm_runtime_mark_last_busy() +- pm_runtime_autosuspend_expiration() + +If pm_runtime_irq_safe() has been called for a device then the following helper +functions may also be used in interrupt context: + +- pm_runtime_idle() +- pm_runtime_suspend() +- pm_runtime_autosuspend() +- pm_runtime_resume() +- pm_runtime_get_sync() +- pm_runtime_put_sync() +- pm_runtime_put_sync_suspend() +- pm_runtime_put_sync_autosuspend() + +5. Runtime PM Initialization, Device Probing and Removal +======================================================== + +Initially, the runtime PM is disabled for all devices, which means that the +majority of the runtime PM helper functions described in Section 4 will return +-EAGAIN until pm_runtime_enable() is called for the device. + +In addition to that, the initial runtime PM status of all devices is +'suspended', but it need not reflect the actual physical state of the device. +Thus, if the device is initially active (i.e. it is able to process I/O), its +runtime PM status must be changed to 'active', with the help of +pm_runtime_set_active(), before pm_runtime_enable() is called for the device. + +However, if the device has a parent and the parent's runtime PM is enabled, +calling pm_runtime_set_active() for the device will affect the parent, unless +the parent's 'power.ignore_children' flag is set. Namely, in that case the +parent won't be able to suspend at run time, using the PM core's helper +functions, as long as the child's status is 'active', even if the child's +runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for +the child yet or pm_runtime_disable() has been called for it). For this reason, +once pm_runtime_set_active() has been called for the device, pm_runtime_enable() +should be called for it too as soon as reasonably possible or its runtime PM +status should be changed back to 'suspended' with the help of +pm_runtime_set_suspended(). + +If the default initial runtime PM status of the device (i.e. 'suspended') +reflects the actual state of the device, its bus type's or its driver's +->probe() callback will likely need to wake it up using one of the PM core's +helper functions described in Section 4. In that case, pm_runtime_resume() +should be used. Of course, for this purpose the device's runtime PM has to be +enabled earlier by calling pm_runtime_enable(). + +Note, if the device may execute pm_runtime calls during the probe (such as +if it is registers with a subsystem that may call back in) then the +pm_runtime_get_sync() call paired with a pm_runtime_put() call will be +appropriate to ensure that the device is not put back to sleep during the +probe. This can happen with systems such as the network device layer. + +It may be desirable to suspend the device once ->probe() has finished. +Therefore the driver core uses the asynchronous pm_request_idle() to submit a +request to execute the subsystem-level idle callback for the device at that +time. A driver that makes use of the runtime autosuspend feature, may want to +update the last busy mark before returning from ->probe(). + +Moreover, the driver core prevents runtime PM callbacks from racing with the bus +notifier callback in __device_release_driver(), which is necessary, because the +notifier is used by some subsystems to carry out operations affecting the +runtime PM functionality. It does so by calling pm_runtime_get_sync() before +driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This +resumes the device if it's in the suspended state and prevents it from +being suspended again while those routines are being executed. + +To allow bus types and drivers to put devices into the suspended state by +calling pm_runtime_suspend() from their ->remove() routines, the driver core +executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER +notifications in __device_release_driver(). This requires bus types and +drivers to make their ->remove() callbacks avoid races with runtime PM directly, +but also it allows of more flexibility in the handling of devices during the +removal of their drivers. + +Drivers in ->remove() callback should undo the runtime PM changes done +in ->probe(). Usually this means calling pm_runtime_disable(), +pm_runtime_dont_use_autosuspend() etc. + +The user space can effectively disallow the driver of the device to power manage +it at run time by changing the value of its /sys/devices/.../power/control +attribute to "on", which causes pm_runtime_forbid() to be called. In principle, +this mechanism may also be used by the driver to effectively turn off the +runtime power management of the device until the user space turns it on. +Namely, during the initialization the driver can make sure that the runtime PM +status of the device is 'active' and call pm_runtime_forbid(). It should be +noted, however, that if the user space has already intentionally changed the +value of /sys/devices/.../power/control to "auto" to allow the driver to power +manage the device at run time, the driver may confuse it by using +pm_runtime_forbid() this way. + +6. Runtime PM and System Sleep +============================== + +Runtime PM and system sleep (i.e., system suspend and hibernation, also known +as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of +ways. If a device is active when a system sleep starts, everything is +straightforward. But what should happen if the device is already suspended? + +The device may have different wake-up settings for runtime PM and system sleep. +For example, remote wake-up may be enabled for runtime suspend but disallowed +for system sleep (device_may_wakeup(dev) returns 'false'). When this happens, +the subsystem-level system suspend callback is responsible for changing the +device's wake-up setting (it may leave that to the device driver's system +suspend routine). It may be necessary to resume the device and suspend it again +in order to do so. The same is true if the driver uses different power levels +or other settings for runtime suspend and system sleep. + +During system resume, the simplest approach is to bring all devices back to full +power, even if they had been suspended before the system suspend began. There +are several reasons for this, including: + + * The device might need to switch power levels, wake-up settings, etc. + + * Remote wake-up events might have been lost by the firmware. + + * The device's children may need the device to be at full power in order + to resume themselves. + + * The driver's idea of the device state may not agree with the device's + physical state. This can happen during resume from hibernation. + + * The device might need to be reset. + + * Even though the device was suspended, if its usage counter was > 0 then most + likely it would need a runtime resume in the near future anyway. + +If the device had been suspended before the system suspend began and it's +brought back to full power during resume, then its runtime PM status will have +to be updated to reflect the actual post-system sleep status. The way to do +this is: + + - pm_runtime_disable(dev); + - pm_runtime_set_active(dev); + - pm_runtime_enable(dev); + +The PM core always increments the runtime usage counter before calling the +->suspend() callback and decrements it after calling the ->resume() callback. +Hence disabling runtime PM temporarily like this will not cause any runtime +suspend attempts to be permanently lost. If the usage count goes to zero +following the return of the ->resume() callback, the ->runtime_idle() callback +will be invoked as usual. + +On some systems, however, system sleep is not entered through a global firmware +or hardware operation. Instead, all hardware components are put into low-power +states directly by the kernel in a coordinated way. Then, the system sleep +state effectively follows from the states the hardware components end up in +and the system is woken up from that state by a hardware interrupt or a similar +mechanism entirely under the kernel's control. As a result, the kernel never +gives control away and the states of all devices during resume are precisely +known to it. If that is the case and none of the situations listed above takes +place (in particular, if the system is not waking up from hibernation), it may +be more efficient to leave the devices that had been suspended before the system +suspend began in the suspended state. + +To this end, the PM core provides a mechanism allowing some coordination between +different levels of device hierarchy. Namely, if a system suspend .prepare() +callback returns a positive number for a device, that indicates to the PM core +that the device appears to be runtime-suspended and its state is fine, so it +may be left in runtime suspend provided that all of its descendants are also +left in runtime suspend. If that happens, the PM core will not execute any +system suspend and resume callbacks for all of those devices, except for the +complete callback, which is then entirely responsible for handling the device +as appropriate. This only applies to system suspend transitions that are not +related to hibernation (see Documentation/driver-api/pm/devices.rst for more +information). + +The PM core does its best to reduce the probability of race conditions between +the runtime PM and system suspend/resume (and hibernation) callbacks by carrying +out the following operations: + + * During system suspend pm_runtime_get_noresume() is called for every device + right before executing the subsystem-level .prepare() callback for it and + pm_runtime_barrier() is called for every device right before executing the + subsystem-level .suspend() callback for it. In addition to that the PM core + calls __pm_runtime_disable() with 'false' as the second argument for every + device right before executing the subsystem-level .suspend_late() callback + for it. + + * During system resume pm_runtime_enable() and pm_runtime_put() are called for + every device right after executing the subsystem-level .resume_early() + callback and right after executing the subsystem-level .complete() callback + for it, respectively. + +7. Generic subsystem callbacks + +Subsystems may wish to conserve code space by using the set of generic power +management callbacks provided by the PM core, defined in +driver/base/power/generic_ops.c: + + `int pm_generic_runtime_suspend(struct device *dev);` + - invoke the ->runtime_suspend() callback provided by the driver of this + device and return its result, or return 0 if not defined + + `int pm_generic_runtime_resume(struct device *dev);` + - invoke the ->runtime_resume() callback provided by the driver of this + device and return its result, or return 0 if not defined + + `int pm_generic_suspend(struct device *dev);` + - if the device has not been suspended at run time, invoke the ->suspend() + callback provided by its driver and return its result, or return 0 if not + defined + + `int pm_generic_suspend_noirq(struct device *dev);` + - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq() + callback provided by the device's driver and return its result, or return + 0 if not defined + + `int pm_generic_resume(struct device *dev);` + - invoke the ->resume() callback provided by the driver of this device and, + if successful, change the device's runtime PM status to 'active' + + `int pm_generic_resume_noirq(struct device *dev);` + - invoke the ->resume_noirq() callback provided by the driver of this device + + `int pm_generic_freeze(struct device *dev);` + - if the device has not been suspended at run time, invoke the ->freeze() + callback provided by its driver and return its result, or return 0 if not + defined + + `int pm_generic_freeze_noirq(struct device *dev);` + - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq() + callback provided by the device's driver and return its result, or return + 0 if not defined + + `int pm_generic_thaw(struct device *dev);` + - if the device has not been suspended at run time, invoke the ->thaw() + callback provided by its driver and return its result, or return 0 if not + defined + + `int pm_generic_thaw_noirq(struct device *dev);` + - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq() + callback provided by the device's driver and return its result, or return + 0 if not defined + + `int pm_generic_poweroff(struct device *dev);` + - if the device has not been suspended at run time, invoke the ->poweroff() + callback provided by its driver and return its result, or return 0 if not + defined + + `int pm_generic_poweroff_noirq(struct device *dev);` + - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq() + callback provided by the device's driver and return its result, or return + 0 if not defined + + `int pm_generic_restore(struct device *dev);` + - invoke the ->restore() callback provided by the driver of this device and, + if successful, change the device's runtime PM status to 'active' + + `int pm_generic_restore_noirq(struct device *dev);` + - invoke the ->restore_noirq() callback provided by the device's driver + +These functions are the defaults used by the PM core, if a subsystem doesn't +provide its own callbacks for ->runtime_idle(), ->runtime_suspend(), +->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(), +->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(), +->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() in the +subsystem-level dev_pm_ops structure. + +Device drivers that wish to use the same function as a system suspend, freeze, +poweroff and runtime suspend callback, and similarly for system resume, thaw, +restore, and runtime resume, can achieve this with the help of the +UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its +last argument to NULL). + +8. "No-Callback" Devices +======================== + +Some "devices" are only logical sub-devices of their parent and cannot be +power-managed on their own. (The prototype example is a USB interface. Entire +USB devices can go into low-power mode or send wake-up requests, but neither is +possible for individual interfaces.) The drivers for these devices have no +need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend() +and ->runtime_resume() would always return 0 without doing anything else and +->runtime_idle() would always call pm_runtime_suspend(). + +Subsystems can tell the PM core about these devices by calling +pm_runtime_no_callbacks(). This should be done after the device structure is +initialized and before it is registered (although after device registration is +also okay). The routine will set the device's power.no_callbacks flag and +prevent the non-debugging runtime PM sysfs attributes from being created. + +When power.no_callbacks is set, the PM core will not invoke the +->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. +Instead it will assume that suspends and resumes always succeed and that idle +devices should be suspended. + +As a consequence, the PM core will never directly inform the device's subsystem +or driver about runtime power changes. Instead, the driver for the device's +parent must take responsibility for telling the device's driver when the +parent's power state changes. + +9. Autosuspend, or automatically-delayed suspends +================================================= + +Changing a device's power state isn't free; it requires both time and energy. +A device should be put in a low-power state only when there's some reason to +think it will remain in that state for a substantial time. A common heuristic +says that a device which hasn't been used for a while is liable to remain +unused; following this advice, drivers should not allow devices to be suspended +at runtime until they have been inactive for some minimum period. Even when +the heuristic ends up being non-optimal, it will still prevent devices from +"bouncing" too rapidly between low-power and full-power states. + +The term "autosuspend" is an historical remnant. It doesn't mean that the +device is automatically suspended (the subsystem or driver still has to call +the appropriate PM routines); rather it means that runtime suspends will +automatically be delayed until the desired period of inactivity has elapsed. + +Inactivity is determined based on the power.last_busy field. Drivers should +call pm_runtime_mark_last_busy() to update this field after carrying out I/O, +typically just before calling pm_runtime_put_autosuspend(). The desired length +of the inactivity period is a matter of policy. Subsystems can set this length +initially by calling pm_runtime_set_autosuspend_delay(), but after device +registration the length should be controlled by user space, using the +/sys/devices/.../power/autosuspend_delay_ms attribute. + +In order to use autosuspend, subsystems or drivers must call +pm_runtime_use_autosuspend() (preferably before registering the device), and +thereafter they should use the various `*_autosuspend()` helper functions +instead of the non-autosuspend counterparts:: + + Instead of: pm_runtime_suspend use: pm_runtime_autosuspend; + Instead of: pm_schedule_suspend use: pm_request_autosuspend; + Instead of: pm_runtime_put use: pm_runtime_put_autosuspend; + Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend. + +Drivers may also continue to use the non-autosuspend helper functions; they +will behave normally, which means sometimes taking the autosuspend delay into +account (see pm_runtime_idle). + +Under some circumstances a driver or subsystem may want to prevent a device +from autosuspending immediately, even though the usage counter is zero and the +autosuspend delay time has expired. If the ->runtime_suspend() callback +returns -EAGAIN or -EBUSY, and if the next autosuspend delay expiration time is +in the future (as it normally would be if the callback invoked +pm_runtime_mark_last_busy()), the PM core will automatically reschedule the +autosuspend. The ->runtime_suspend() callback can't do this rescheduling +itself because no suspend requests of any kind are accepted while the device is +suspending (i.e., while the callback is running). + +The implementation is well suited for asynchronous use in interrupt contexts. +However such use inevitably involves races, because the PM core can't +synchronize ->runtime_suspend() callbacks with the arrival of I/O requests. +This synchronization must be handled by the driver, using its private lock. +Here is a schematic pseudo-code example:: + + foo_read_or_write(struct foo_priv *foo, void *data) + { + lock(&foo->private_lock); + add_request_to_io_queue(foo, data); + if (foo->num_pending_requests++ == 0) + pm_runtime_get(&foo->dev); + if (!foo->is_suspended) + foo_process_next_request(foo); + unlock(&foo->private_lock); + } + + foo_io_completion(struct foo_priv *foo, void *req) + { + lock(&foo->private_lock); + if (--foo->num_pending_requests == 0) { + pm_runtime_mark_last_busy(&foo->dev); + pm_runtime_put_autosuspend(&foo->dev); + } else { + foo_process_next_request(foo); + } + unlock(&foo->private_lock); + /* Send req result back to the user ... */ + } + + int foo_runtime_suspend(struct device *dev) + { + struct foo_priv foo = container_of(dev, ...); + int ret = 0; + + lock(&foo->private_lock); + if (foo->num_pending_requests > 0) { + ret = -EBUSY; + } else { + /* ... suspend the device ... */ + foo->is_suspended = 1; + } + unlock(&foo->private_lock); + return ret; + } + + int foo_runtime_resume(struct device *dev) + { + struct foo_priv foo = container_of(dev, ...); + + lock(&foo->private_lock); + /* ... resume the device ... */ + foo->is_suspended = 0; + pm_runtime_mark_last_busy(&foo->dev); + if (foo->num_pending_requests > 0) + foo_process_next_request(foo); + unlock(&foo->private_lock); + return 0; + } + +The important point is that after foo_io_completion() asks for an autosuspend, +the foo_runtime_suspend() callback may race with foo_read_or_write(). +Therefore foo_runtime_suspend() has to check whether there are any pending I/O +requests (while holding the private lock) before allowing the suspend to +proceed. + +In addition, the power.autosuspend_delay field can be changed by user space at +any time. If a driver cares about this, it can call +pm_runtime_autosuspend_expiration() from within the ->runtime_suspend() +callback while holding its private lock. If the function returns a nonzero +value then the delay has not yet expired and the callback should return +-EAGAIN. diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt deleted file mode 100644 index 937e33c46211..000000000000 --- a/Documentation/power/runtime_pm.txt +++ /dev/null @@ -1,928 +0,0 @@ -Runtime Power Management Framework for I/O Devices - -(C) 2009-2011 Rafael J. Wysocki , Novell Inc. -(C) 2010 Alan Stern -(C) 2014 Intel Corp., Rafael J. Wysocki - -1. Introduction - -Support for runtime power management (runtime PM) of I/O devices is provided -at the power management core (PM core) level by means of: - -* The power management workqueue pm_wq in which bus types and device drivers can - put their PM-related work items. It is strongly recommended that pm_wq be - used for queuing all work items related to runtime PM, because this allows - them to be synchronized with system-wide power transitions (suspend to RAM, - hibernation and resume from system sleep states). pm_wq is declared in - include/linux/pm_runtime.h and defined in kernel/power/main.c. - -* A number of runtime PM fields in the 'power' member of 'struct device' (which - is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can - be used for synchronizing runtime PM operations with one another. - -* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in - include/linux/pm.h). - -* A set of helper functions defined in drivers/base/power/runtime.c that can be - used for carrying out runtime PM operations in such a way that the - synchronization between them is taken care of by the PM core. Bus types and - device drivers are encouraged to use these functions. - -The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM -fields of 'struct dev_pm_info' and the core helper functions provided for -runtime PM are described below. - -2. Device Runtime PM Callbacks - -There are three device runtime PM callbacks defined in 'struct dev_pm_ops': - -struct dev_pm_ops { - ... - int (*runtime_suspend)(struct device *dev); - int (*runtime_resume)(struct device *dev); - int (*runtime_idle)(struct device *dev); - ... -}; - -The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks -are executed by the PM core for the device's subsystem that may be either of -the following: - - 1. PM domain of the device, if the device's PM domain object, dev->pm_domain, - is present. - - 2. Device type of the device, if both dev->type and dev->type->pm are present. - - 3. Device class of the device, if both dev->class and dev->class->pm are - present. - - 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. - -If the subsystem chosen by applying the above rules doesn't provide the relevant -callback, the PM core will invoke the corresponding driver callback stored in -dev->driver->pm directly (if present). - -The PM core always checks which callback to use in the order given above, so the -priority order of callbacks from high to low is: PM domain, device type, class -and bus type. Moreover, the high-priority one will always take precedence over -a low-priority one. The PM domain, bus type, device type and class callbacks -are referred to as subsystem-level callbacks in what follows. - -By default, the callbacks are always invoked in process context with interrupts -enabled. However, the pm_runtime_irq_safe() helper function can be used to tell -the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume() -and ->runtime_idle() callbacks for the given device in atomic context with -interrupts disabled. This implies that the callback routines in question must -not block or sleep, but it also means that the synchronous helper functions -listed at the end of Section 4 may be used for that device within an interrupt -handler or generally in an atomic context. - -The subsystem-level suspend callback, if present, is _entirely_ _responsible_ -for handling the suspend of the device as appropriate, which may, but need not -include executing the device driver's own ->runtime_suspend() callback (from the -PM core's point of view it is not necessary to implement a ->runtime_suspend() -callback in a device driver as long as the subsystem-level suspend callback -knows what to do to handle the device). - - * Once the subsystem-level suspend callback (or the driver suspend callback, - if invoked directly) has completed successfully for the given device, the PM - core regards the device as suspended, which need not mean that it has been - put into a low power state. It is supposed to mean, however, that the - device will not process data and will not communicate with the CPU(s) and - RAM until the appropriate resume callback is executed for it. The runtime - PM status of a device after successful execution of the suspend callback is - 'suspended'. - - * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM - status remains 'active', which means that the device _must_ be fully - operational afterwards. - - * If the suspend callback returns an error code different from -EBUSY and - -EAGAIN, the PM core regards this as a fatal error and will refuse to run - the helper functions described in Section 4 for the device until its status - is directly set to either 'active', or 'suspended' (the PM core provides - special helper functions for this purpose). - -In particular, if the driver requires remote wakeup capability (i.e. hardware -mechanism allowing the device to request a change of its power state, such as -PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the -device, then ->runtime_suspend() should return -EBUSY. On the other hand, if -device_can_wakeup() returns 'true' for the device and the device is put into a -low-power state during the execution of the suspend callback, it is expected -that remote wakeup will be enabled for the device. Generally, remote wakeup -should be enabled for all input devices put into low-power states at run time. - -The subsystem-level resume callback, if present, is _entirely_ _responsible_ for -handling the resume of the device as appropriate, which may, but need not -include executing the device driver's own ->runtime_resume() callback (from the -PM core's point of view it is not necessary to implement a ->runtime_resume() -callback in a device driver as long as the subsystem-level resume callback knows -what to do to handle the device). - - * Once the subsystem-level resume callback (or the driver resume callback, if - invoked directly) has completed successfully, the PM core regards the device - as fully operational, which means that the device _must_ be able to complete - I/O operations as needed. The runtime PM status of the device is then - 'active'. - - * If the resume callback returns an error code, the PM core regards this as a - fatal error and will refuse to run the helper functions described in Section - 4 for the device, until its status is directly set to either 'active', or - 'suspended' (by means of special helper functions provided by the PM core - for this purpose). - -The idle callback (a subsystem-level one, if present, or the driver one) is -executed by the PM core whenever the device appears to be idle, which is -indicated to the PM core by two counters, the device's usage counter and the -counter of 'active' children of the device. - - * If any of these counters is decreased using a helper function provided by - the PM core and it turns out to be equal to zero, the other counter is - checked. If that counter also is equal to zero, the PM core executes the - idle callback with the device as its argument. - -The action performed by the idle callback is totally dependent on the subsystem -(or driver) in question, but the expected and recommended action is to check -if the device can be suspended (i.e. if all of the conditions necessary for -suspending the device are satisfied) and to queue up a suspend request for the -device in that case. If there is no idle callback, or if the callback returns -0, then the PM core will attempt to carry out a runtime suspend of the device, -also respecting devices configured for autosuspend. In essence this means a -call to pm_runtime_autosuspend() (do note that drivers needs to update the -device last busy mark, pm_runtime_mark_last_busy(), to control the delay under -this circumstance). To prevent this (for example, if the callback routine has -started a delayed suspend), the routine must return a non-zero value. Negative -error return codes are ignored by the PM core. - -The helper functions provided by the PM core, described in Section 4, guarantee -that the following constraints are met with respect to runtime PM callbacks for -one device: - -(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute - ->runtime_suspend() in parallel with ->runtime_resume() or with another - instance of ->runtime_suspend() for the same device) with the exception that - ->runtime_suspend() or ->runtime_resume() can be executed in parallel with - ->runtime_idle() (although ->runtime_idle() will not be started while any - of the other callbacks is being executed for the same device). - -(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' - devices (i.e. the PM core will only execute ->runtime_idle() or - ->runtime_suspend() for the devices the runtime PM status of which is - 'active'). - -(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device - the usage counter of which is equal to zero _and_ either the counter of - 'active' children of which is equal to zero, or the 'power.ignore_children' - flag of which is set. - -(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the - PM core will only execute ->runtime_resume() for the devices the runtime - PM status of which is 'suspended'). - -Additionally, the helper functions provided by the PM core obey the following -rules: - - * If ->runtime_suspend() is about to be executed or there's a pending request - to execute it, ->runtime_idle() will not be executed for the same device. - - * A request to execute or to schedule the execution of ->runtime_suspend() - will cancel any pending requests to execute ->runtime_idle() for the same - device. - - * If ->runtime_resume() is about to be executed or there's a pending request - to execute it, the other callbacks will not be executed for the same device. - - * A request to execute ->runtime_resume() will cancel any pending or - scheduled requests to execute the other callbacks for the same device, - except for scheduled autosuspends. - -3. Runtime PM Device Fields - -The following device runtime PM fields are present in 'struct dev_pm_info', as -defined in include/linux/pm.h: - - struct timer_list suspend_timer; - - timer used for scheduling (delayed) suspend and autosuspend requests - - unsigned long timer_expires; - - timer expiration time, in jiffies (if this is different from zero, the - timer is running and will expire at that time, otherwise the timer is not - running) - - struct work_struct work; - - work structure used for queuing up requests (i.e. work items in pm_wq) - - wait_queue_head_t wait_queue; - - wait queue used if any of the helper functions needs to wait for another - one to complete - - spinlock_t lock; - - lock used for synchronization - - atomic_t usage_count; - - the usage counter of the device - - atomic_t child_count; - - the count of 'active' children of the device - - unsigned int ignore_children; - - if set, the value of child_count is ignored (but still updated) - - unsigned int disable_depth; - - used for disabling the helper functions (they work normally if this is - equal to zero); the initial value of it is 1 (i.e. runtime PM is - initially disabled for all devices) - - int runtime_error; - - if set, there was a fatal error (one of the callbacks returned error code - as described in Section 2), so the helper functions will not work until - this flag is cleared; this is the error code returned by the failing - callback - - unsigned int idle_notification; - - if set, ->runtime_idle() is being executed - - unsigned int request_pending; - - if set, there's a pending request (i.e. a work item queued up into pm_wq) - - enum rpm_request request; - - type of request that's pending (valid if request_pending is set) - - unsigned int deferred_resume; - - set if ->runtime_resume() is about to be run while ->runtime_suspend() is - being executed for that device and it is not practical to wait for the - suspend to complete; means "start a resume as soon as you've suspended" - - enum rpm_status runtime_status; - - the runtime PM status of the device; this field's initial value is - RPM_SUSPENDED, which means that each device is initially regarded by the - PM core as 'suspended', regardless of its real hardware status - - unsigned int runtime_auto; - - if set, indicates that the user space has allowed the device driver to - power manage the device at run time via the /sys/devices/.../power/control - interface; it may only be modified with the help of the pm_runtime_allow() - and pm_runtime_forbid() helper functions - - unsigned int no_callbacks; - - indicates that the device does not use the runtime PM callbacks (see - Section 8); it may be modified only by the pm_runtime_no_callbacks() - helper function - - unsigned int irq_safe; - - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks - will be invoked with the spinlock held and interrupts disabled - - unsigned int use_autosuspend; - - indicates that the device's driver supports delayed autosuspend (see - Section 9); it may be modified only by the - pm_runtime{_dont}_use_autosuspend() helper functions - - unsigned int timer_autosuspends; - - indicates that the PM core should attempt to carry out an autosuspend - when the timer expires rather than a normal suspend - - int autosuspend_delay; - - the delay time (in milliseconds) to be used for autosuspend - - unsigned long last_busy; - - the time (in jiffies) when the pm_runtime_mark_last_busy() helper - function was last called for this device; used in calculating inactivity - periods for autosuspend - -All of the above fields are members of the 'power' member of 'struct device'. - -4. Runtime PM Device Helper Functions - -The following runtime PM helper functions are defined in -drivers/base/power/runtime.c and include/linux/pm_runtime.h: - - void pm_runtime_init(struct device *dev); - - initialize the device runtime PM fields in 'struct dev_pm_info' - - void pm_runtime_remove(struct device *dev); - - make sure that the runtime PM of the device will be disabled after - removing the device from device hierarchy - - int pm_runtime_idle(struct device *dev); - - execute the subsystem-level idle callback for the device; returns an - error code on failure, where -EINPROGRESS means that ->runtime_idle() is - already being executed; if there is no callback or the callback returns 0 - then run pm_runtime_autosuspend(dev) and return its result - - int pm_runtime_suspend(struct device *dev); - - execute the subsystem-level suspend callback for the device; returns 0 on - success, 1 if the device's runtime PM status was already 'suspended', or - error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt - to suspend the device again in future and -EACCES means that - 'power.disable_depth' is different from 0 - - int pm_runtime_autosuspend(struct device *dev); - - same as pm_runtime_suspend() except that the autosuspend delay is taken - into account; if pm_runtime_autosuspend_expiration() says the delay has - not yet expired then an autosuspend is scheduled for the appropriate time - and 0 is returned - - int pm_runtime_resume(struct device *dev); - - execute the subsystem-level resume callback for the device; returns 0 on - success, 1 if the device's runtime PM status was already 'active' or - error code on failure, where -EAGAIN means it may be safe to attempt to - resume the device again in future, but 'power.runtime_error' should be - checked additionally, and -EACCES means that 'power.disable_depth' is - different from 0 - - int pm_request_idle(struct device *dev); - - submit a request to execute the subsystem-level idle callback for the - device (the request is represented by a work item in pm_wq); returns 0 on - success or error code if the request has not been queued up - - int pm_request_autosuspend(struct device *dev); - - schedule the execution of the subsystem-level suspend callback for the - device when the autosuspend delay has expired; if the delay has already - expired then the work item is queued up immediately - - int pm_schedule_suspend(struct device *dev, unsigned int delay); - - schedule the execution of the subsystem-level suspend callback for the - device in future, where 'delay' is the time to wait before queuing up a - suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work - item is queued up immediately); returns 0 on success, 1 if the device's PM - runtime status was already 'suspended', or error code if the request - hasn't been scheduled (or queued up if 'delay' is 0); if the execution of - ->runtime_suspend() is already scheduled and not yet expired, the new - value of 'delay' will be used as the time to wait - - int pm_request_resume(struct device *dev); - - submit a request to execute the subsystem-level resume callback for the - device (the request is represented by a work item in pm_wq); returns 0 on - success, 1 if the device's runtime PM status was already 'active', or - error code if the request hasn't been queued up - - void pm_runtime_get_noresume(struct device *dev); - - increment the device's usage counter - - int pm_runtime_get(struct device *dev); - - increment the device's usage counter, run pm_request_resume(dev) and - return its result - - int pm_runtime_get_sync(struct device *dev); - - increment the device's usage counter, run pm_runtime_resume(dev) and - return its result - - int pm_runtime_get_if_in_use(struct device *dev); - - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the - runtime PM status is RPM_ACTIVE and the runtime PM usage counter is - nonzero, increment the counter and return 1; otherwise return 0 without - changing the counter - - void pm_runtime_put_noidle(struct device *dev); - - decrement the device's usage counter - - int pm_runtime_put(struct device *dev); - - decrement the device's usage counter; if the result is 0 then run - pm_request_idle(dev) and return its result - - int pm_runtime_put_autosuspend(struct device *dev); - - decrement the device's usage counter; if the result is 0 then run - pm_request_autosuspend(dev) and return its result - - int pm_runtime_put_sync(struct device *dev); - - decrement the device's usage counter; if the result is 0 then run - pm_runtime_idle(dev) and return its result - - int pm_runtime_put_sync_suspend(struct device *dev); - - decrement the device's usage counter; if the result is 0 then run - pm_runtime_suspend(dev) and return its result - - int pm_runtime_put_sync_autosuspend(struct device *dev); - - decrement the device's usage counter; if the result is 0 then run - pm_runtime_autosuspend(dev) and return its result - - void pm_runtime_enable(struct device *dev); - - decrement the device's 'power.disable_depth' field; if that field is equal - to zero, the runtime PM helper functions can execute subsystem-level - callbacks described in Section 2 for the device - - int pm_runtime_disable(struct device *dev); - - increment the device's 'power.disable_depth' field (if the value of that - field was previously zero, this prevents subsystem-level runtime PM - callbacks from being run for the device), make sure that all of the - pending runtime PM operations on the device are either completed or - canceled; returns 1 if there was a resume request pending and it was - necessary to execute the subsystem-level resume callback for the device - to satisfy that request, otherwise 0 is returned - - int pm_runtime_barrier(struct device *dev); - - check if there's a resume request pending for the device and resume it - (synchronously) in that case, cancel any other pending runtime PM requests - regarding it and wait for all runtime PM operations on it in progress to - complete; returns 1 if there was a resume request pending and it was - necessary to execute the subsystem-level resume callback for the device to - satisfy that request, otherwise 0 is returned - - void pm_suspend_ignore_children(struct device *dev, bool enable); - - set/unset the power.ignore_children flag of the device - - int pm_runtime_set_active(struct device *dev); - - clear the device's 'power.runtime_error' flag, set the device's runtime - PM status to 'active' and update its parent's counter of 'active' - children as appropriate (it is only valid to use this function if - 'power.runtime_error' is set or 'power.disable_depth' is greater than - zero); it will fail and return error code if the device has a parent - which is not active and the 'power.ignore_children' flag of which is unset - - void pm_runtime_set_suspended(struct device *dev); - - clear the device's 'power.runtime_error' flag, set the device's runtime - PM status to 'suspended' and update its parent's counter of 'active' - children as appropriate (it is only valid to use this function if - 'power.runtime_error' is set or 'power.disable_depth' is greater than - zero) - - bool pm_runtime_active(struct device *dev); - - return true if the device's runtime PM status is 'active' or its - 'power.disable_depth' field is not equal to zero, or false otherwise - - bool pm_runtime_suspended(struct device *dev); - - return true if the device's runtime PM status is 'suspended' and its - 'power.disable_depth' field is equal to zero, or false otherwise - - bool pm_runtime_status_suspended(struct device *dev); - - return true if the device's runtime PM status is 'suspended' - - void pm_runtime_allow(struct device *dev); - - set the power.runtime_auto flag for the device and decrease its usage - counter (used by the /sys/devices/.../power/control interface to - effectively allow the device to be power managed at run time) - - void pm_runtime_forbid(struct device *dev); - - unset the power.runtime_auto flag for the device and increase its usage - counter (used by the /sys/devices/.../power/control interface to - effectively prevent the device from being power managed at run time) - - void pm_runtime_no_callbacks(struct device *dev); - - set the power.no_callbacks flag for the device and remove the runtime - PM attributes from /sys/devices/.../power (or prevent them from being - added when the device is registered) - - void pm_runtime_irq_safe(struct device *dev); - - set the power.irq_safe flag for the device, causing the runtime-PM - callbacks to be invoked with interrupts off - - bool pm_runtime_is_irq_safe(struct device *dev); - - return true if power.irq_safe flag was set for the device, causing - the runtime-PM callbacks to be invoked with interrupts off - - void pm_runtime_mark_last_busy(struct device *dev); - - set the power.last_busy field to the current time - - void pm_runtime_use_autosuspend(struct device *dev); - - set the power.use_autosuspend flag, enabling autosuspend delays; call - pm_runtime_get_sync if the flag was previously cleared and - power.autosuspend_delay is negative - - void pm_runtime_dont_use_autosuspend(struct device *dev); - - clear the power.use_autosuspend flag, disabling autosuspend delays; - decrement the device's usage counter if the flag was previously set and - power.autosuspend_delay is negative; call pm_runtime_idle - - void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); - - set the power.autosuspend_delay value to 'delay' (expressed in - milliseconds); if 'delay' is negative then runtime suspends are - prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be - called or the device's usage counter may be decremented and - pm_runtime_idle called depending on if power.autosuspend_delay is - changed to or from a negative value; if power.use_autosuspend is clear, - pm_runtime_idle is called - - unsigned long pm_runtime_autosuspend_expiration(struct device *dev); - - calculate the time when the current autosuspend delay period will expire, - based on power.last_busy and power.autosuspend_delay; if the delay time - is 1000 ms or larger then the expiration time is rounded up to the - nearest second; returns 0 if the delay period has already expired or - power.use_autosuspend isn't set, otherwise returns the expiration time - in jiffies - -It is safe to execute the following helper functions from interrupt context: - -pm_request_idle() -pm_request_autosuspend() -pm_schedule_suspend() -pm_request_resume() -pm_runtime_get_noresume() -pm_runtime_get() -pm_runtime_put_noidle() -pm_runtime_put() -pm_runtime_put_autosuspend() -pm_runtime_enable() -pm_suspend_ignore_children() -pm_runtime_set_active() -pm_runtime_set_suspended() -pm_runtime_suspended() -pm_runtime_mark_last_busy() -pm_runtime_autosuspend_expiration() - -If pm_runtime_irq_safe() has been called for a device then the following helper -functions may also be used in interrupt context: - -pm_runtime_idle() -pm_runtime_suspend() -pm_runtime_autosuspend() -pm_runtime_resume() -pm_runtime_get_sync() -pm_runtime_put_sync() -pm_runtime_put_sync_suspend() -pm_runtime_put_sync_autosuspend() - -5. Runtime PM Initialization, Device Probing and Removal - -Initially, the runtime PM is disabled for all devices, which means that the -majority of the runtime PM helper functions described in Section 4 will return --EAGAIN until pm_runtime_enable() is called for the device. - -In addition to that, the initial runtime PM status of all devices is -'suspended', but it need not reflect the actual physical state of the device. -Thus, if the device is initially active (i.e. it is able to process I/O), its -runtime PM status must be changed to 'active', with the help of -pm_runtime_set_active(), before pm_runtime_enable() is called for the device. - -However, if the device has a parent and the parent's runtime PM is enabled, -calling pm_runtime_set_active() for the device will affect the parent, unless -the parent's 'power.ignore_children' flag is set. Namely, in that case the -parent won't be able to suspend at run time, using the PM core's helper -functions, as long as the child's status is 'active', even if the child's -runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for -the child yet or pm_runtime_disable() has been called for it). For this reason, -once pm_runtime_set_active() has been called for the device, pm_runtime_enable() -should be called for it too as soon as reasonably possible or its runtime PM -status should be changed back to 'suspended' with the help of -pm_runtime_set_suspended(). - -If the default initial runtime PM status of the device (i.e. 'suspended') -reflects the actual state of the device, its bus type's or its driver's -->probe() callback will likely need to wake it up using one of the PM core's -helper functions described in Section 4. In that case, pm_runtime_resume() -should be used. Of course, for this purpose the device's runtime PM has to be -enabled earlier by calling pm_runtime_enable(). - -Note, if the device may execute pm_runtime calls during the probe (such as -if it is registers with a subsystem that may call back in) then the -pm_runtime_get_sync() call paired with a pm_runtime_put() call will be -appropriate to ensure that the device is not put back to sleep during the -probe. This can happen with systems such as the network device layer. - -It may be desirable to suspend the device once ->probe() has finished. -Therefore the driver core uses the asynchronous pm_request_idle() to submit a -request to execute the subsystem-level idle callback for the device at that -time. A driver that makes use of the runtime autosuspend feature, may want to -update the last busy mark before returning from ->probe(). - -Moreover, the driver core prevents runtime PM callbacks from racing with the bus -notifier callback in __device_release_driver(), which is necessary, because the -notifier is used by some subsystems to carry out operations affecting the -runtime PM functionality. It does so by calling pm_runtime_get_sync() before -driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This -resumes the device if it's in the suspended state and prevents it from -being suspended again while those routines are being executed. - -To allow bus types and drivers to put devices into the suspended state by -calling pm_runtime_suspend() from their ->remove() routines, the driver core -executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER -notifications in __device_release_driver(). This requires bus types and -drivers to make their ->remove() callbacks avoid races with runtime PM directly, -but also it allows of more flexibility in the handling of devices during the -removal of their drivers. - -Drivers in ->remove() callback should undo the runtime PM changes done -in ->probe(). Usually this means calling pm_runtime_disable(), -pm_runtime_dont_use_autosuspend() etc. - -The user space can effectively disallow the driver of the device to power manage -it at run time by changing the value of its /sys/devices/.../power/control -attribute to "on", which causes pm_runtime_forbid() to be called. In principle, -this mechanism may also be used by the driver to effectively turn off the -runtime power management of the device until the user space turns it on. -Namely, during the initialization the driver can make sure that the runtime PM -status of the device is 'active' and call pm_runtime_forbid(). It should be -noted, however, that if the user space has already intentionally changed the -value of /sys/devices/.../power/control to "auto" to allow the driver to power -manage the device at run time, the driver may confuse it by using -pm_runtime_forbid() this way. - -6. Runtime PM and System Sleep - -Runtime PM and system sleep (i.e., system suspend and hibernation, also known -as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of -ways. If a device is active when a system sleep starts, everything is -straightforward. But what should happen if the device is already suspended? - -The device may have different wake-up settings for runtime PM and system sleep. -For example, remote wake-up may be enabled for runtime suspend but disallowed -for system sleep (device_may_wakeup(dev) returns 'false'). When this happens, -the subsystem-level system suspend callback is responsible for changing the -device's wake-up setting (it may leave that to the device driver's system -suspend routine). It may be necessary to resume the device and suspend it again -in order to do so. The same is true if the driver uses different power levels -or other settings for runtime suspend and system sleep. - -During system resume, the simplest approach is to bring all devices back to full -power, even if they had been suspended before the system suspend began. There -are several reasons for this, including: - - * The device might need to switch power levels, wake-up settings, etc. - - * Remote wake-up events might have been lost by the firmware. - - * The device's children may need the device to be at full power in order - to resume themselves. - - * The driver's idea of the device state may not agree with the device's - physical state. This can happen during resume from hibernation. - - * The device might need to be reset. - - * Even though the device was suspended, if its usage counter was > 0 then most - likely it would need a runtime resume in the near future anyway. - -If the device had been suspended before the system suspend began and it's -brought back to full power during resume, then its runtime PM status will have -to be updated to reflect the actual post-system sleep status. The way to do -this is: - - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - -The PM core always increments the runtime usage counter before calling the -->suspend() callback and decrements it after calling the ->resume() callback. -Hence disabling runtime PM temporarily like this will not cause any runtime -suspend attempts to be permanently lost. If the usage count goes to zero -following the return of the ->resume() callback, the ->runtime_idle() callback -will be invoked as usual. - -On some systems, however, system sleep is not entered through a global firmware -or hardware operation. Instead, all hardware components are put into low-power -states directly by the kernel in a coordinated way. Then, the system sleep -state effectively follows from the states the hardware components end up in -and the system is woken up from that state by a hardware interrupt or a similar -mechanism entirely under the kernel's control. As a result, the kernel never -gives control away and the states of all devices during resume are precisely -known to it. If that is the case and none of the situations listed above takes -place (in particular, if the system is not waking up from hibernation), it may -be more efficient to leave the devices that had been suspended before the system -suspend began in the suspended state. - -To this end, the PM core provides a mechanism allowing some coordination between -different levels of device hierarchy. Namely, if a system suspend .prepare() -callback returns a positive number for a device, that indicates to the PM core -that the device appears to be runtime-suspended and its state is fine, so it -may be left in runtime suspend provided that all of its descendants are also -left in runtime suspend. If that happens, the PM core will not execute any -system suspend and resume callbacks for all of those devices, except for the -complete callback, which is then entirely responsible for handling the device -as appropriate. This only applies to system suspend transitions that are not -related to hibernation (see Documentation/driver-api/pm/devices.rst for more -information). - -The PM core does its best to reduce the probability of race conditions between -the runtime PM and system suspend/resume (and hibernation) callbacks by carrying -out the following operations: - - * During system suspend pm_runtime_get_noresume() is called for every device - right before executing the subsystem-level .prepare() callback for it and - pm_runtime_barrier() is called for every device right before executing the - subsystem-level .suspend() callback for it. In addition to that the PM core - calls __pm_runtime_disable() with 'false' as the second argument for every - device right before executing the subsystem-level .suspend_late() callback - for it. - - * During system resume pm_runtime_enable() and pm_runtime_put() are called for - every device right after executing the subsystem-level .resume_early() - callback and right after executing the subsystem-level .complete() callback - for it, respectively. - -7. Generic subsystem callbacks - -Subsystems may wish to conserve code space by using the set of generic power -management callbacks provided by the PM core, defined in -driver/base/power/generic_ops.c: - - int pm_generic_runtime_suspend(struct device *dev); - - invoke the ->runtime_suspend() callback provided by the driver of this - device and return its result, or return 0 if not defined - - int pm_generic_runtime_resume(struct device *dev); - - invoke the ->runtime_resume() callback provided by the driver of this - device and return its result, or return 0 if not defined - - int pm_generic_suspend(struct device *dev); - - if the device has not been suspended at run time, invoke the ->suspend() - callback provided by its driver and return its result, or return 0 if not - defined - - int pm_generic_suspend_noirq(struct device *dev); - - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq() - callback provided by the device's driver and return its result, or return - 0 if not defined - - int pm_generic_resume(struct device *dev); - - invoke the ->resume() callback provided by the driver of this device and, - if successful, change the device's runtime PM status to 'active' - - int pm_generic_resume_noirq(struct device *dev); - - invoke the ->resume_noirq() callback provided by the driver of this device - - int pm_generic_freeze(struct device *dev); - - if the device has not been suspended at run time, invoke the ->freeze() - callback provided by its driver and return its result, or return 0 if not - defined - - int pm_generic_freeze_noirq(struct device *dev); - - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq() - callback provided by the device's driver and return its result, or return - 0 if not defined - - int pm_generic_thaw(struct device *dev); - - if the device has not been suspended at run time, invoke the ->thaw() - callback provided by its driver and return its result, or return 0 if not - defined - - int pm_generic_thaw_noirq(struct device *dev); - - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq() - callback provided by the device's driver and return its result, or return - 0 if not defined - - int pm_generic_poweroff(struct device *dev); - - if the device has not been suspended at run time, invoke the ->poweroff() - callback provided by its driver and return its result, or return 0 if not - defined - - int pm_generic_poweroff_noirq(struct device *dev); - - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq() - callback provided by the device's driver and return its result, or return - 0 if not defined - - int pm_generic_restore(struct device *dev); - - invoke the ->restore() callback provided by the driver of this device and, - if successful, change the device's runtime PM status to 'active' - - int pm_generic_restore_noirq(struct device *dev); - - invoke the ->restore_noirq() callback provided by the device's driver - -These functions are the defaults used by the PM core, if a subsystem doesn't -provide its own callbacks for ->runtime_idle(), ->runtime_suspend(), -->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(), -->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(), -->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() in the -subsystem-level dev_pm_ops structure. - -Device drivers that wish to use the same function as a system suspend, freeze, -poweroff and runtime suspend callback, and similarly for system resume, thaw, -restore, and runtime resume, can achieve this with the help of the -UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its -last argument to NULL). - -8. "No-Callback" Devices - -Some "devices" are only logical sub-devices of their parent and cannot be -power-managed on their own. (The prototype example is a USB interface. Entire -USB devices can go into low-power mode or send wake-up requests, but neither is -possible for individual interfaces.) The drivers for these devices have no -need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend() -and ->runtime_resume() would always return 0 without doing anything else and -->runtime_idle() would always call pm_runtime_suspend(). - -Subsystems can tell the PM core about these devices by calling -pm_runtime_no_callbacks(). This should be done after the device structure is -initialized and before it is registered (although after device registration is -also okay). The routine will set the device's power.no_callbacks flag and -prevent the non-debugging runtime PM sysfs attributes from being created. - -When power.no_callbacks is set, the PM core will not invoke the -->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. -Instead it will assume that suspends and resumes always succeed and that idle -devices should be suspended. - -As a consequence, the PM core will never directly inform the device's subsystem -or driver about runtime power changes. Instead, the driver for the device's -parent must take responsibility for telling the device's driver when the -parent's power state changes. - -9. Autosuspend, or automatically-delayed suspends - -Changing a device's power state isn't free; it requires both time and energy. -A device should be put in a low-power state only when there's some reason to -think it will remain in that state for a substantial time. A common heuristic -says that a device which hasn't been used for a while is liable to remain -unused; following this advice, drivers should not allow devices to be suspended -at runtime until they have been inactive for some minimum period. Even when -the heuristic ends up being non-optimal, it will still prevent devices from -"bouncing" too rapidly between low-power and full-power states. - -The term "autosuspend" is an historical remnant. It doesn't mean that the -device is automatically suspended (the subsystem or driver still has to call -the appropriate PM routines); rather it means that runtime suspends will -automatically be delayed until the desired period of inactivity has elapsed. - -Inactivity is determined based on the power.last_busy field. Drivers should -call pm_runtime_mark_last_busy() to update this field after carrying out I/O, -typically just before calling pm_runtime_put_autosuspend(). The desired length -of the inactivity period is a matter of policy. Subsystems can set this length -initially by calling pm_runtime_set_autosuspend_delay(), but after device -registration the length should be controlled by user space, using the -/sys/devices/.../power/autosuspend_delay_ms attribute. - -In order to use autosuspend, subsystems or drivers must call -pm_runtime_use_autosuspend() (preferably before registering the device), and -thereafter they should use the various *_autosuspend() helper functions instead -of the non-autosuspend counterparts: - - Instead of: pm_runtime_suspend use: pm_runtime_autosuspend; - Instead of: pm_schedule_suspend use: pm_request_autosuspend; - Instead of: pm_runtime_put use: pm_runtime_put_autosuspend; - Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend. - -Drivers may also continue to use the non-autosuspend helper functions; they -will behave normally, which means sometimes taking the autosuspend delay into -account (see pm_runtime_idle). - -Under some circumstances a driver or subsystem may want to prevent a device -from autosuspending immediately, even though the usage counter is zero and the -autosuspend delay time has expired. If the ->runtime_suspend() callback -returns -EAGAIN or -EBUSY, and if the next autosuspend delay expiration time is -in the future (as it normally would be if the callback invoked -pm_runtime_mark_last_busy()), the PM core will automatically reschedule the -autosuspend. The ->runtime_suspend() callback can't do this rescheduling -itself because no suspend requests of any kind are accepted while the device is -suspending (i.e., while the callback is running). - -The implementation is well suited for asynchronous use in interrupt contexts. -However such use inevitably involves races, because the PM core can't -synchronize ->runtime_suspend() callbacks with the arrival of I/O requests. -This synchronization must be handled by the driver, using its private lock. -Here is a schematic pseudo-code example: - - foo_read_or_write(struct foo_priv *foo, void *data) - { - lock(&foo->private_lock); - add_request_to_io_queue(foo, data); - if (foo->num_pending_requests++ == 0) - pm_runtime_get(&foo->dev); - if (!foo->is_suspended) - foo_process_next_request(foo); - unlock(&foo->private_lock); - } - - foo_io_completion(struct foo_priv *foo, void *req) - { - lock(&foo->private_lock); - if (--foo->num_pending_requests == 0) { - pm_runtime_mark_last_busy(&foo->dev); - pm_runtime_put_autosuspend(&foo->dev); - } else { - foo_process_next_request(foo); - } - unlock(&foo->private_lock); - /* Send req result back to the user ... */ - } - - int foo_runtime_suspend(struct device *dev) - { - struct foo_priv foo = container_of(dev, ...); - int ret = 0; - - lock(&foo->private_lock); - if (foo->num_pending_requests > 0) { - ret = -EBUSY; - } else { - /* ... suspend the device ... */ - foo->is_suspended = 1; - } - unlock(&foo->private_lock); - return ret; - } - - int foo_runtime_resume(struct device *dev) - { - struct foo_priv foo = container_of(dev, ...); - - lock(&foo->private_lock); - /* ... resume the device ... */ - foo->is_suspended = 0; - pm_runtime_mark_last_busy(&foo->dev); - if (foo->num_pending_requests > 0) - foo_process_next_request(foo); - unlock(&foo->private_lock); - return 0; - } - -The important point is that after foo_io_completion() asks for an autosuspend, -the foo_runtime_suspend() callback may race with foo_read_or_write(). -Therefore foo_runtime_suspend() has to check whether there are any pending I/O -requests (while holding the private lock) before allowing the suspend to -proceed. - -In addition, the power.autosuspend_delay field can be changed by user space at -any time. If a driver cares about this, it can call -pm_runtime_autosuspend_expiration() from within the ->runtime_suspend() -callback while holding its private lock. If the function returns a nonzero -value then the delay has not yet expired and the callback should return --EAGAIN. diff --git a/Documentation/power/s2ram.rst b/Documentation/power/s2ram.rst new file mode 100644 index 000000000000..d739aa7c742c --- /dev/null +++ b/Documentation/power/s2ram.rst @@ -0,0 +1,87 @@ +======================== +How to get s2ram working +======================== + +2006 Linus Torvalds +2006 Pavel Machek + +1) Check suspend.sf.net, program s2ram there has long whitelist of + "known ok" machines, along with tricks to use on each one. + +2) If that does not help, try reading tricks.txt and + video.txt. Perhaps problem is as simple as broken module, and + simple module unload can fix it. + +3) You can use Linus' TRACE_RESUME infrastructure, described below. + +Using TRACE_RESUME +~~~~~~~~~~~~~~~~~~ + +I've been working at making the machines I have able to STR, and almost +always it's a driver that is buggy. Thank God for the suspend/resume +debugging - the thing that Chuck tried to disable. That's often the _only_ +way to debug these things, and it's actually pretty powerful (but +time-consuming - having to insert TRACE_RESUME() markers into the device +driver that doesn't resume and recompile and reboot). + +Anyway, the way to debug this for people who are interested (have a +machine that doesn't boot) is: + + - enable PM_DEBUG, and PM_TRACE + + - use a script like this:: + + #!/bin/sh + sync + echo 1 > /sys/power/pm_trace + echo mem > /sys/power/state + + to suspend + + - if it doesn't come back up (which is usually the problem), reboot by + holding the power button down, and look at the dmesg output for things + like:: + + Magic number: 4:156:725 + hash matches drivers/base/power/resume.c:28 + hash matches device 0000:01:00.0 + + which means that the last trace event was just before trying to resume + device 0000:01:00.0. Then figure out what driver is controlling that + device (lspci and /sys/devices/pci* is your friend), and see if you can + fix it, disable it, or trace into its resume function. + + If no device matches the hash (or any matches appear to be false positives), + the culprit may be a device from a loadable kernel module that is not loaded + until after the hash is checked. You can check the hash against the current + devices again after more modules are loaded using sysfs:: + + cat /sys/power/pm_trace_dev_match + +For example, the above happens to be the VGA device on my EVO, which I +used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out +that "radeonfb" simply cannot resume that device - it tries to set the +PLL's, and it just _hangs_. Using the regular VGA console and letting X +resume it instead works fine. + +NOTE +==== +pm_trace uses the system's Real Time Clock (RTC) to save the magic number. +Reason for this is that the RTC is the only reliably available piece of +hardware during resume operations where a value can be set that will +survive a reboot. + +pm_trace is not compatible with asynchronous suspend, so it turns +asynchronous suspend off (which may work around timing or +ordering-sensitive bugs). + +Consequence is that after a resume (even if it is successful) your system +clock will have a value corresponding to the magic number instead of the +correct date/time! It is therefore advisable to use a program like ntp-date +or rdate to reset the correct date/time from an external time source when +using this trace option. + +As the clock keeps ticking it is also essential that the reboot is done +quickly after the resume failure. The trace option does not use the seconds +or the low order bits of the minutes of the RTC, but a too long delay will +corrupt the magic value. diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt deleted file mode 100644 index 4685aee197fd..000000000000 --- a/Documentation/power/s2ram.txt +++ /dev/null @@ -1,85 +0,0 @@ - How to get s2ram working - ~~~~~~~~~~~~~~~~~~~~~~~~ - 2006 Linus Torvalds - 2006 Pavel Machek - -1) Check suspend.sf.net, program s2ram there has long whitelist of - "known ok" machines, along with tricks to use on each one. - -2) If that does not help, try reading tricks.txt and - video.txt. Perhaps problem is as simple as broken module, and - simple module unload can fix it. - -3) You can use Linus' TRACE_RESUME infrastructure, described below. - - Using TRACE_RESUME - ~~~~~~~~~~~~~~~~~~ - -I've been working at making the machines I have able to STR, and almost -always it's a driver that is buggy. Thank God for the suspend/resume -debugging - the thing that Chuck tried to disable. That's often the _only_ -way to debug these things, and it's actually pretty powerful (but -time-consuming - having to insert TRACE_RESUME() markers into the device -driver that doesn't resume and recompile and reboot). - -Anyway, the way to debug this for people who are interested (have a -machine that doesn't boot) is: - - - enable PM_DEBUG, and PM_TRACE - - - use a script like this: - - #!/bin/sh - sync - echo 1 > /sys/power/pm_trace - echo mem > /sys/power/state - - to suspend - - - if it doesn't come back up (which is usually the problem), reboot by - holding the power button down, and look at the dmesg output for things - like - - Magic number: 4:156:725 - hash matches drivers/base/power/resume.c:28 - hash matches device 0000:01:00.0 - - which means that the last trace event was just before trying to resume - device 0000:01:00.0. Then figure out what driver is controlling that - device (lspci and /sys/devices/pci* is your friend), and see if you can - fix it, disable it, or trace into its resume function. - - If no device matches the hash (or any matches appear to be false positives), - the culprit may be a device from a loadable kernel module that is not loaded - until after the hash is checked. You can check the hash against the current - devices again after more modules are loaded using sysfs: - - cat /sys/power/pm_trace_dev_match - -For example, the above happens to be the VGA device on my EVO, which I -used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out -that "radeonfb" simply cannot resume that device - it tries to set the -PLL's, and it just _hangs_. Using the regular VGA console and letting X -resume it instead works fine. - -NOTE -==== -pm_trace uses the system's Real Time Clock (RTC) to save the magic number. -Reason for this is that the RTC is the only reliably available piece of -hardware during resume operations where a value can be set that will -survive a reboot. - -pm_trace is not compatible with asynchronous suspend, so it turns -asynchronous suspend off (which may work around timing or -ordering-sensitive bugs). - -Consequence is that after a resume (even if it is successful) your system -clock will have a value corresponding to the magic number instead of the -correct date/time! It is therefore advisable to use a program like ntp-date -or rdate to reset the correct date/time from an external time source when -using this trace option. - -As the clock keeps ticking it is also essential that the reboot is done -quickly after the resume failure. The trace option does not use the seconds -or the low order bits of the minutes of the RTC, but a too long delay will -corrupt the magic value. diff --git a/Documentation/power/suspend-and-cpuhotplug.rst b/Documentation/power/suspend-and-cpuhotplug.rst new file mode 100644 index 000000000000..7ac8e1f549f4 --- /dev/null +++ b/Documentation/power/suspend-and-cpuhotplug.rst @@ -0,0 +1,286 @@ +==================================================================== +Interaction of Suspend code (S3) with the CPU hotplug infrastructure +==================================================================== + +(C) 2011 - 2014 Srivatsa S. Bhat + + +I. Differences between CPU hotplug and Suspend-to-RAM +====================================================== + +How does the regular CPU hotplug code differ from how the Suspend-to-RAM +infrastructure uses it internally? And where do they share common code? + +Well, a picture is worth a thousand words... So ASCII art follows :-) + +[This depicts the current design in the kernel, and focusses only on the +interactions involving the freezer and CPU hotplug and also tries to explain +the locking involved. It outlines the notifications involved as well. +But please note that here, only the call paths are illustrated, with the aim +of describing where they take different paths and where they share code. +What happens when regular CPU hotplug and Suspend-to-RAM race with each other +is not depicted here.] + +On a high level, the suspend-resume cycle goes like this:: + + |Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw | + |tasks | | cpus | | | | cpus | |tasks| + + +More details follow:: + + Suspend call path + ----------------- + + Write 'mem' to + /sys/power/state + sysfs file + | + v + Acquire system_transition_mutex lock + | + v + Send PM_SUSPEND_PREPARE + notifications + | + v + Freeze tasks + | + | + v + disable_nonboot_cpus() + /* start */ + | + v + Acquire cpu_add_remove_lock + | + v + Iterate over CURRENTLY + online CPUs + | + | + | ---------- + v | L + ======> _cpu_down() | + | [This takes cpuhotplug.lock | + Common | before taking down the CPU | + code | and releases it when done] | O + | While it is at it, notifications | + | are sent when notable events occur, | + ======> by running all registered callbacks. | + | | O + | | + | | + v | + Note down these cpus in | P + frozen_cpus mask ---------- + | + v + Disable regular cpu hotplug + by increasing cpu_hotplug_disabled + | + v + Release cpu_add_remove_lock + | + v + /* disable_nonboot_cpus() complete */ + | + v + Do suspend + + + +Resuming back is likewise, with the counterparts being (in the order of +execution during resume): + +* enable_nonboot_cpus() which involves:: + + | Acquire cpu_add_remove_lock + | Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug + | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop] + | Release cpu_add_remove_lock + v + +* thaw tasks +* send PM_POST_SUSPEND notifications +* Release system_transition_mutex lock. + + +It is to be noted here that the system_transition_mutex lock is acquired at the very +beginning, when we are just starting out to suspend, and then released only +after the entire cycle is complete (i.e., suspend + resume). + +:: + + + + Regular CPU hotplug call path + ----------------------------- + + Write 0 (or 1) to + /sys/devices/system/cpu/cpu*/online + sysfs file + | + | + v + cpu_down() + | + v + Acquire cpu_add_remove_lock + | + v + If cpu_hotplug_disabled > 0 + return gracefully + | + | + v + ======> _cpu_down() + | [This takes cpuhotplug.lock + Common | before taking down the CPU + code | and releases it when done] + | While it is at it, notifications + | are sent when notable events occur, + ======> by running all registered callbacks. + | + | + v + Release cpu_add_remove_lock + [That's it!, for + regular CPU hotplug] + + + +So, as can be seen from the two diagrams (the parts marked as "Common code"), +regular CPU hotplug and the suspend code path converge at the _cpu_down() and +_cpu_up() functions. They differ in the arguments passed to these functions, +in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen' +argument. But during suspend, since the tasks are already frozen by the time +the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called +with the 'tasks_frozen' argument set to 1. +[See below for some known issues regarding this.] + + +Important files and functions/entry points: +------------------------------------------- + +- kernel/power/process.c : freeze_processes(), thaw_processes() +- kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish() +- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus() + + + +II. What are the issues involved in CPU hotplug? +------------------------------------------------ + +There are some interesting situations involving CPU hotplug and microcode +update on the CPUs, as discussed below: + +[Please bear in mind that the kernel requests the microcode images from +userspace, using the request_firmware() function defined in +drivers/base/firmware_loader/main.c] + + +a. When all the CPUs are identical: + + This is the most common situation and it is quite straightforward: we want + to apply the same microcode revision to each of the CPUs. + To give an example of x86, the collect_cpu_info() function defined in + arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU + and thereby in applying the correct microcode revision to it. + But note that the kernel does not maintain a common microcode image for the + all CPUs, in order to handle case 'b' described below. + + +b. When some of the CPUs are different than the rest: + + In this case since we probably need to apply different microcode revisions + to different CPUs, the kernel maintains a copy of the correct microcode + image for each CPU (after appropriate CPU type/model discovery using + functions such as collect_cpu_info()). + + +c. When a CPU is physically hot-unplugged and a new (and possibly different + type of) CPU is hot-plugged into the system: + + In the current design of the kernel, whenever a CPU is taken offline during + a regular CPU hotplug operation, upon receiving the CPU_DEAD notification + (which is sent by the CPU hotplug code), the microcode update driver's + callback for that event reacts by freeing the kernel's copy of the + microcode image for that CPU. + + Hence, when a new CPU is brought online, since the kernel finds that it + doesn't have the microcode image, it does the CPU type/model discovery + afresh and then requests the userspace for the appropriate microcode image + for that CPU, which is subsequently applied. + + For example, in x86, the mc_cpu_callback() function (which is the microcode + update driver's callback registered for CPU hotplug events) calls + microcode_update_cpu() which would call microcode_init_cpu() in this case, + instead of microcode_resume_cpu() when it finds that the kernel doesn't + have a valid microcode image. This ensures that the CPU type/model + discovery is performed and the right microcode is applied to the CPU after + getting it from userspace. + + +d. Handling microcode update during suspend/hibernate: + + Strictly speaking, during a CPU hotplug operation which does not involve + physically removing or inserting CPUs, the CPUs are not actually powered + off during a CPU offline. They are just put to the lowest C-states possible. + Hence, in such a case, it is not really necessary to re-apply microcode + when the CPUs are brought back online, since they wouldn't have lost the + image during the CPU offline operation. + + This is the usual scenario encountered during a resume after a suspend. + However, in the case of hibernation, since all the CPUs are completely + powered off, during restore it becomes necessary to apply the microcode + images to all the CPUs. + + [Note that we don't expect someone to physically pull out nodes and insert + nodes with a different type of CPUs in-between a suspend-resume or a + hibernate/restore cycle.] + + In the current design of the kernel however, during a CPU offline operation + as part of the suspend/hibernate cycle (cpuhp_tasks_frozen is set), + the existing copy of microcode image in the kernel is not freed up. + And during the CPU online operations (during resume/restore), since the + kernel finds that it already has copies of the microcode images for all the + CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU + type/model and the need for validating whether the microcode revisions are + right for the CPUs or not (due to the above assumption that physical CPU + hotplug will not be done in-between suspend/resume or hibernate/restore + cycles). + + +III. Known problems +=================== + +Are there any known problems when regular CPU hotplug and suspend race +with each other? + +Yes, they are listed below: + +1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to + the _cpu_down() and _cpu_up() functions is *always* 0. + This might not reflect the true current state of the system, since the + tasks could have been frozen by an out-of-band event such as a suspend + operation in progress. Hence, the cpuhp_tasks_frozen variable will not + reflect the frozen state and the CPU hotplug callbacks which evaluate + that variable might execute the wrong code path. + +2. If a regular CPU hotplug stress test happens to race with the freezer due + to a suspend operation in progress at the same time, then we could hit the + situation described below: + + * A regular cpu online operation continues its journey from userspace + into the kernel, since the freezing has not yet begun. + * Then freezer gets to work and freezes userspace. + * If cpu online has not yet completed the microcode update stuff by now, + it will now start waiting on the frozen userspace in the + TASK_UNINTERRUPTIBLE state, in order to get the microcode image. + * Now the freezer continues and tries to freeze the remaining tasks. But + due to this wait mentioned above, the freezer won't be able to freeze + the cpu online hotplug task and hence freezing of tasks fails. + + As a result of this task freezing failure, the suspend operation gets + aborted. diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt deleted file mode 100644 index a8751b8df10e..000000000000 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ /dev/null @@ -1,274 +0,0 @@ -Interaction of Suspend code (S3) with the CPU hotplug infrastructure - - (C) 2011 - 2014 Srivatsa S. Bhat - - -I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM - infrastructure uses it internally? And where do they share common code? - -Well, a picture is worth a thousand words... So ASCII art follows :-) - -[This depicts the current design in the kernel, and focusses only on the -interactions involving the freezer and CPU hotplug and also tries to explain -the locking involved. It outlines the notifications involved as well. -But please note that here, only the call paths are illustrated, with the aim -of describing where they take different paths and where they share code. -What happens when regular CPU hotplug and Suspend-to-RAM race with each other -is not depicted here.] - -On a high level, the suspend-resume cycle goes like this: - -|Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw | -|tasks | | cpus | | | | cpus | |tasks| - - -More details follow: - - Suspend call path - ----------------- - - Write 'mem' to - /sys/power/state - sysfs file - | - v - Acquire system_transition_mutex lock - | - v - Send PM_SUSPEND_PREPARE - notifications - | - v - Freeze tasks - | - | - v - disable_nonboot_cpus() - /* start */ - | - v - Acquire cpu_add_remove_lock - | - v - Iterate over CURRENTLY - online CPUs - | - | - | ---------- - v | L - ======> _cpu_down() | - | [This takes cpuhotplug.lock | - Common | before taking down the CPU | - code | and releases it when done] | O - | While it is at it, notifications | - | are sent when notable events occur, | - ======> by running all registered callbacks. | - | | O - | | - | | - v | - Note down these cpus in | P - frozen_cpus mask ---------- - | - v - Disable regular cpu hotplug - by increasing cpu_hotplug_disabled - | - v - Release cpu_add_remove_lock - | - v - /* disable_nonboot_cpus() complete */ - | - v - Do suspend - - - -Resuming back is likewise, with the counterparts being (in the order of -execution during resume): -* enable_nonboot_cpus() which involves: - | Acquire cpu_add_remove_lock - | Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug - | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop] - | Release cpu_add_remove_lock - v - -* thaw tasks -* send PM_POST_SUSPEND notifications -* Release system_transition_mutex lock. - - -It is to be noted here that the system_transition_mutex lock is acquired at the very -beginning, when we are just starting out to suspend, and then released only -after the entire cycle is complete (i.e., suspend + resume). - - - - Regular CPU hotplug call path - ----------------------------- - - Write 0 (or 1) to - /sys/devices/system/cpu/cpu*/online - sysfs file - | - | - v - cpu_down() - | - v - Acquire cpu_add_remove_lock - | - v - If cpu_hotplug_disabled > 0 - return gracefully - | - | - v - ======> _cpu_down() - | [This takes cpuhotplug.lock - Common | before taking down the CPU - code | and releases it when done] - | While it is at it, notifications - | are sent when notable events occur, - ======> by running all registered callbacks. - | - | - v - Release cpu_add_remove_lock - [That's it!, for - regular CPU hotplug] - - - -So, as can be seen from the two diagrams (the parts marked as "Common code"), -regular CPU hotplug and the suspend code path converge at the _cpu_down() and -_cpu_up() functions. They differ in the arguments passed to these functions, -in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen' -argument. But during suspend, since the tasks are already frozen by the time -the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called -with the 'tasks_frozen' argument set to 1. -[See below for some known issues regarding this.] - - -Important files and functions/entry points: ------------------------------------------- - -kernel/power/process.c : freeze_processes(), thaw_processes() -kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish() -kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus() - - - -II. What are the issues involved in CPU hotplug? - ------------------------------------------- - -There are some interesting situations involving CPU hotplug and microcode -update on the CPUs, as discussed below: - -[Please bear in mind that the kernel requests the microcode images from -userspace, using the request_firmware() function defined in -drivers/base/firmware_loader/main.c] - - -a. When all the CPUs are identical: - - This is the most common situation and it is quite straightforward: we want - to apply the same microcode revision to each of the CPUs. - To give an example of x86, the collect_cpu_info() function defined in - arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU - and thereby in applying the correct microcode revision to it. - But note that the kernel does not maintain a common microcode image for the - all CPUs, in order to handle case 'b' described below. - - -b. When some of the CPUs are different than the rest: - - In this case since we probably need to apply different microcode revisions - to different CPUs, the kernel maintains a copy of the correct microcode - image for each CPU (after appropriate CPU type/model discovery using - functions such as collect_cpu_info()). - - -c. When a CPU is physically hot-unplugged and a new (and possibly different - type of) CPU is hot-plugged into the system: - - In the current design of the kernel, whenever a CPU is taken offline during - a regular CPU hotplug operation, upon receiving the CPU_DEAD notification - (which is sent by the CPU hotplug code), the microcode update driver's - callback for that event reacts by freeing the kernel's copy of the - microcode image for that CPU. - - Hence, when a new CPU is brought online, since the kernel finds that it - doesn't have the microcode image, it does the CPU type/model discovery - afresh and then requests the userspace for the appropriate microcode image - for that CPU, which is subsequently applied. - - For example, in x86, the mc_cpu_callback() function (which is the microcode - update driver's callback registered for CPU hotplug events) calls - microcode_update_cpu() which would call microcode_init_cpu() in this case, - instead of microcode_resume_cpu() when it finds that the kernel doesn't - have a valid microcode image. This ensures that the CPU type/model - discovery is performed and the right microcode is applied to the CPU after - getting it from userspace. - - -d. Handling microcode update during suspend/hibernate: - - Strictly speaking, during a CPU hotplug operation which does not involve - physically removing or inserting CPUs, the CPUs are not actually powered - off during a CPU offline. They are just put to the lowest C-states possible. - Hence, in such a case, it is not really necessary to re-apply microcode - when the CPUs are brought back online, since they wouldn't have lost the - image during the CPU offline operation. - - This is the usual scenario encountered during a resume after a suspend. - However, in the case of hibernation, since all the CPUs are completely - powered off, during restore it becomes necessary to apply the microcode - images to all the CPUs. - - [Note that we don't expect someone to physically pull out nodes and insert - nodes with a different type of CPUs in-between a suspend-resume or a - hibernate/restore cycle.] - - In the current design of the kernel however, during a CPU offline operation - as part of the suspend/hibernate cycle (cpuhp_tasks_frozen is set), - the existing copy of microcode image in the kernel is not freed up. - And during the CPU online operations (during resume/restore), since the - kernel finds that it already has copies of the microcode images for all the - CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU - type/model and the need for validating whether the microcode revisions are - right for the CPUs or not (due to the above assumption that physical CPU - hotplug will not be done in-between suspend/resume or hibernate/restore - cycles). - - -III. Are there any known problems when regular CPU hotplug and suspend race - with each other? - -Yes, they are listed below: - -1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to - the _cpu_down() and _cpu_up() functions is *always* 0. - This might not reflect the true current state of the system, since the - tasks could have been frozen by an out-of-band event such as a suspend - operation in progress. Hence, the cpuhp_tasks_frozen variable will not - reflect the frozen state and the CPU hotplug callbacks which evaluate - that variable might execute the wrong code path. - -2. If a regular CPU hotplug stress test happens to race with the freezer due - to a suspend operation in progress at the same time, then we could hit the - situation described below: - - * A regular cpu online operation continues its journey from userspace - into the kernel, since the freezing has not yet begun. - * Then freezer gets to work and freezes userspace. - * If cpu online has not yet completed the microcode update stuff by now, - it will now start waiting on the frozen userspace in the - TASK_UNINTERRUPTIBLE state, in order to get the microcode image. - * Now the freezer continues and tries to freeze the remaining tasks. But - due to this wait mentioned above, the freezer won't be able to freeze - the cpu online hotplug task and hence freezing of tasks fails. - - As a result of this task freezing failure, the suspend operation gets - aborted. diff --git a/Documentation/power/suspend-and-interrupts.rst b/Documentation/power/suspend-and-interrupts.rst new file mode 100644 index 000000000000..4cda6617709a --- /dev/null +++ b/Documentation/power/suspend-and-interrupts.rst @@ -0,0 +1,137 @@ +==================================== +System Suspend and Device Interrupts +==================================== + +Copyright (C) 2014 Intel Corp. +Author: Rafael J. Wysocki + + +Suspending and Resuming Device IRQs +----------------------------------- + +Device interrupt request lines (IRQs) are generally disabled during system +suspend after the "late" phase of suspending devices (that is, after all of the +->prepare, ->suspend and ->suspend_late callbacks have been executed for all +devices). That is done by suspend_device_irqs(). + +The rationale for doing so is that after the "late" phase of device suspend +there is no legitimate reason why any interrupts from suspended devices should +trigger and if any devices have not been suspended properly yet, it is better to +block interrupts from them anyway. Also, in the past we had problems with +interrupt handlers for shared IRQs that device drivers implementing them were +not prepared for interrupts triggering after their devices had been suspended. +In some cases they would attempt to access, for example, memory address spaces +of suspended devices and cause unpredictable behavior to ensue as a result. +Unfortunately, such problems are very difficult to debug and the introduction +of suspend_device_irqs(), along with the "noirq" phase of device suspend and +resume, was the only practical way to mitigate them. + +Device IRQs are re-enabled during system resume, right before the "early" phase +of resuming devices (that is, before starting to execute ->resume_early +callbacks for devices). The function doing that is resume_device_irqs(). + + +The IRQF_NO_SUSPEND Flag +------------------------ + +There are interrupts that can legitimately trigger during the entire system +suspend-resume cycle, including the "noirq" phases of suspending and resuming +devices as well as during the time when nonboot CPUs are taken offline and +brought back online. That applies to timer interrupts in the first place, +but also to IPIs and to some other special-purpose interrupts. + +The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when +requesting a special-purpose interrupt. It causes suspend_device_irqs() to +leave the corresponding IRQ enabled so as to allow the interrupt to work as +expected during the suspend-resume cycle, but does not guarantee that the +interrupt will wake the system from a suspended state -- for such cases it is +necessary to use enable_irq_wake(). + +Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one +user of it. Thus, if the IRQ is shared, all of the interrupt handlers installed +for it will be executed as usual after suspend_device_irqs(), even if the +IRQF_NO_SUSPEND flag was not passed to request_irq() (or equivalent) by some of +the IRQ's users. For this reason, using IRQF_NO_SUSPEND and IRQF_SHARED at the +same time should be avoided. + + +System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake() +------------------------------------------------------------------ + +System wakeup interrupts generally need to be configured to wake up the system +from sleep states, especially if they are used for different purposes (e.g. as +I/O interrupts) in the working state. + +That may involve turning on a special signal handling logic within the platform +(such as an SoC) so that signals from a given line are routed in a different way +during system sleep so as to trigger a system wakeup when needed. For example, +the platform may include a dedicated interrupt controller used specifically for +handling system wakeup events. Then, if a given interrupt line is supposed to +wake up the system from sleep sates, the corresponding input of that interrupt +controller needs to be enabled to receive signals from the line in question. +After wakeup, it generally is better to disable that input to prevent the +dedicated controller from triggering interrupts unnecessarily. + +The IRQ subsystem provides two helper functions to be used by device drivers for +those purposes. Namely, enable_irq_wake() turns on the platform's logic for +handling the given IRQ as a system wakeup interrupt line and disable_irq_wake() +turns that logic off. + +Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ +in a special way. Namely, the IRQ remains enabled, by on the first interrupt +it will be disabled, marked as pending and "suspended" so that it will be +re-enabled by resume_device_irqs() during the subsequent system resume. Also +the PM core is notified about the event which causes the system suspend in +progress to be aborted (that doesn't have to happen immediately, but at one +of the points where the suspend thread looks for pending wakeup events). + +This way every interrupt from a wakeup interrupt source will either cause the +system suspend currently in progress to be aborted or wake up the system if +already suspended. However, after suspend_device_irqs() interrupt handlers are +not executed for system wakeup IRQs. They are only executed for IRQF_NO_SUSPEND +IRQs at that time, but those IRQs should not be configured for system wakeup +using enable_irq_wake(). + + +Interrupts and Suspend-to-Idle +------------------------------ + +Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new +system sleep state that works by idling all of the processors and waiting for +interrupts right after the "noirq" phase of suspending devices. + +Of course, this means that all of the interrupts with the IRQF_NO_SUSPEND flag +set will bring CPUs out of idle while in that state, but they will not cause the +IRQ subsystem to trigger a system wakeup. + +System wakeup interrupts, in turn, will trigger wakeup from suspend-to-idle in +analogy with what they do in the full system suspend case. The only difference +is that the wakeup from suspend-to-idle is signaled using the usual working +state interrupt delivery mechanisms and doesn't require the platform to use +any special interrupt handling logic for it to work. + + +IRQF_NO_SUSPEND and enable_irq_wake() +------------------------------------- + +There are very few valid reasons to use both enable_irq_wake() and the +IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the +same device. + +First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND +interrupts (interrupt handlers are invoked after suspend_device_irqs()) are +directly at odds with the rules for handling system wakeup interrupts (interrupt +handlers are not invoked after suspend_device_irqs()). + +Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not +to individual interrupt handlers, so sharing an IRQ between a system wakeup +interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally +make sense. + +In rare cases an IRQ can be shared between a wakeup device driver and an +IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver +must be able to discern spurious IRQs from genuine wakeup events (signalling +the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to +ensure that the IRQ will function as a wakeup source, and must request the IRQ +with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If +these requirements are not met, it is not valid to use IRQF_COND_SUSPEND. diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt deleted file mode 100644 index 8afb29a8604a..000000000000 --- a/Documentation/power/suspend-and-interrupts.txt +++ /dev/null @@ -1,135 +0,0 @@ -System Suspend and Device Interrupts - -Copyright (C) 2014 Intel Corp. -Author: Rafael J. Wysocki - - -Suspending and Resuming Device IRQs ------------------------------------ - -Device interrupt request lines (IRQs) are generally disabled during system -suspend after the "late" phase of suspending devices (that is, after all of the -->prepare, ->suspend and ->suspend_late callbacks have been executed for all -devices). That is done by suspend_device_irqs(). - -The rationale for doing so is that after the "late" phase of device suspend -there is no legitimate reason why any interrupts from suspended devices should -trigger and if any devices have not been suspended properly yet, it is better to -block interrupts from them anyway. Also, in the past we had problems with -interrupt handlers for shared IRQs that device drivers implementing them were -not prepared for interrupts triggering after their devices had been suspended. -In some cases they would attempt to access, for example, memory address spaces -of suspended devices and cause unpredictable behavior to ensue as a result. -Unfortunately, such problems are very difficult to debug and the introduction -of suspend_device_irqs(), along with the "noirq" phase of device suspend and -resume, was the only practical way to mitigate them. - -Device IRQs are re-enabled during system resume, right before the "early" phase -of resuming devices (that is, before starting to execute ->resume_early -callbacks for devices). The function doing that is resume_device_irqs(). - - -The IRQF_NO_SUSPEND Flag ------------------------- - -There are interrupts that can legitimately trigger during the entire system -suspend-resume cycle, including the "noirq" phases of suspending and resuming -devices as well as during the time when nonboot CPUs are taken offline and -brought back online. That applies to timer interrupts in the first place, -but also to IPIs and to some other special-purpose interrupts. - -The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when -requesting a special-purpose interrupt. It causes suspend_device_irqs() to -leave the corresponding IRQ enabled so as to allow the interrupt to work as -expected during the suspend-resume cycle, but does not guarantee that the -interrupt will wake the system from a suspended state -- for such cases it is -necessary to use enable_irq_wake(). - -Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one -user of it. Thus, if the IRQ is shared, all of the interrupt handlers installed -for it will be executed as usual after suspend_device_irqs(), even if the -IRQF_NO_SUSPEND flag was not passed to request_irq() (or equivalent) by some of -the IRQ's users. For this reason, using IRQF_NO_SUSPEND and IRQF_SHARED at the -same time should be avoided. - - -System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake() ------------------------------------------------------------------- - -System wakeup interrupts generally need to be configured to wake up the system -from sleep states, especially if they are used for different purposes (e.g. as -I/O interrupts) in the working state. - -That may involve turning on a special signal handling logic within the platform -(such as an SoC) so that signals from a given line are routed in a different way -during system sleep so as to trigger a system wakeup when needed. For example, -the platform may include a dedicated interrupt controller used specifically for -handling system wakeup events. Then, if a given interrupt line is supposed to -wake up the system from sleep sates, the corresponding input of that interrupt -controller needs to be enabled to receive signals from the line in question. -After wakeup, it generally is better to disable that input to prevent the -dedicated controller from triggering interrupts unnecessarily. - -The IRQ subsystem provides two helper functions to be used by device drivers for -those purposes. Namely, enable_irq_wake() turns on the platform's logic for -handling the given IRQ as a system wakeup interrupt line and disable_irq_wake() -turns that logic off. - -Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ -in a special way. Namely, the IRQ remains enabled, by on the first interrupt -it will be disabled, marked as pending and "suspended" so that it will be -re-enabled by resume_device_irqs() during the subsequent system resume. Also -the PM core is notified about the event which causes the system suspend in -progress to be aborted (that doesn't have to happen immediately, but at one -of the points where the suspend thread looks for pending wakeup events). - -This way every interrupt from a wakeup interrupt source will either cause the -system suspend currently in progress to be aborted or wake up the system if -already suspended. However, after suspend_device_irqs() interrupt handlers are -not executed for system wakeup IRQs. They are only executed for IRQF_NO_SUSPEND -IRQs at that time, but those IRQs should not be configured for system wakeup -using enable_irq_wake(). - - -Interrupts and Suspend-to-Idle ------------------------------- - -Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new -system sleep state that works by idling all of the processors and waiting for -interrupts right after the "noirq" phase of suspending devices. - -Of course, this means that all of the interrupts with the IRQF_NO_SUSPEND flag -set will bring CPUs out of idle while in that state, but they will not cause the -IRQ subsystem to trigger a system wakeup. - -System wakeup interrupts, in turn, will trigger wakeup from suspend-to-idle in -analogy with what they do in the full system suspend case. The only difference -is that the wakeup from suspend-to-idle is signaled using the usual working -state interrupt delivery mechanisms and doesn't require the platform to use -any special interrupt handling logic for it to work. - - -IRQF_NO_SUSPEND and enable_irq_wake() -------------------------------------- - -There are very few valid reasons to use both enable_irq_wake() and the -IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the -same device. - -First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND -interrupts (interrupt handlers are invoked after suspend_device_irqs()) are -directly at odds with the rules for handling system wakeup interrupts (interrupt -handlers are not invoked after suspend_device_irqs()). - -Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not -to individual interrupt handlers, so sharing an IRQ between a system wakeup -interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally -make sense. - -In rare cases an IRQ can be shared between a wakeup device driver and an -IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver -must be able to discern spurious IRQs from genuine wakeup events (signalling -the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to -ensure that the IRQ will function as a wakeup source, and must request the IRQ -with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If -these requirements are not met, it is not valid to use IRQF_COND_SUSPEND. diff --git a/Documentation/power/swsusp-and-swap-files.rst b/Documentation/power/swsusp-and-swap-files.rst new file mode 100644 index 000000000000..a33a2919dbe4 --- /dev/null +++ b/Documentation/power/swsusp-and-swap-files.rst @@ -0,0 +1,63 @@ +=============================================== +Using swap files with software suspend (swsusp) +=============================================== + + (C) 2006 Rafael J. Wysocki + +The Linux kernel handles swap files almost in the same way as it handles swap +partitions and there are only two differences between these two types of swap +areas: +(1) swap files need not be contiguous, +(2) the header of a swap file is not in the first block of the partition that +holds it. From the swsusp's point of view (1) is not a problem, because it is +already taken care of by the swap-handling code, but (2) has to be taken into +consideration. + +In principle the location of a swap file's header may be determined with the +help of appropriate filesystem driver. Unfortunately, however, it requires the +filesystem holding the swap file to be mounted, and if this filesystem is +journaled, it cannot be mounted during resume from disk. For this reason to +identify a swap file swsusp uses the name of the partition that holds the file +and the offset from the beginning of the partition at which the swap file's +header is located. For convenience, this offset is expressed in +units. + +In order to use a swap file with swsusp, you need to: + +1) Create the swap file and make it active, eg.:: + + # dd if=/dev/zero of= bs=1024 count= + # mkswap + # swapon + +2) Use an application that will bmap the swap file with the help of the +FIBMAP ioctl and determine the location of the file's swap header, as the +offset, in units, from the beginning of the partition which +holds the swap file. + +3) Add the following parameters to the kernel command line:: + + resume= resume_offset= + +where is the partition on which the swap file is located +and is the offset of the swap header determined by the +application in 2) (of course, this step may be carried out automatically +by the same application that determines the swap file's header offset using the +FIBMAP ioctl) + +OR + +Use a userland suspend application that will set the partition and offset +with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in +Documentation/power/userland-swsusp.rst (this is the only method to suspend +to a swap file allowing the resume to be initiated from an initrd or initramfs +image). + +Now, swsusp will use the swap file in the same way in which it would use a swap +partition. In particular, the swap file has to be active (ie. be present in +/proc/swaps) so that it can be used for suspending. + +Note that if the swap file used for suspending is deleted and recreated, +the location of its header need not be the same as before. Thus every time +this happens the value of the "resume_offset=" kernel command line parameter +has to be updated. diff --git a/Documentation/power/swsusp-and-swap-files.txt b/Documentation/power/swsusp-and-swap-files.txt deleted file mode 100644 index f281886de490..000000000000 --- a/Documentation/power/swsusp-and-swap-files.txt +++ /dev/null @@ -1,60 +0,0 @@ -Using swap files with software suspend (swsusp) - (C) 2006 Rafael J. Wysocki - -The Linux kernel handles swap files almost in the same way as it handles swap -partitions and there are only two differences between these two types of swap -areas: -(1) swap files need not be contiguous, -(2) the header of a swap file is not in the first block of the partition that -holds it. From the swsusp's point of view (1) is not a problem, because it is -already taken care of by the swap-handling code, but (2) has to be taken into -consideration. - -In principle the location of a swap file's header may be determined with the -help of appropriate filesystem driver. Unfortunately, however, it requires the -filesystem holding the swap file to be mounted, and if this filesystem is -journaled, it cannot be mounted during resume from disk. For this reason to -identify a swap file swsusp uses the name of the partition that holds the file -and the offset from the beginning of the partition at which the swap file's -header is located. For convenience, this offset is expressed in -units. - -In order to use a swap file with swsusp, you need to: - -1) Create the swap file and make it active, eg. - -# dd if=/dev/zero of= bs=1024 count= -# mkswap -# swapon - -2) Use an application that will bmap the swap file with the help of the -FIBMAP ioctl and determine the location of the file's swap header, as the -offset, in units, from the beginning of the partition which -holds the swap file. - -3) Add the following parameters to the kernel command line: - -resume= resume_offset= - -where is the partition on which the swap file is located -and is the offset of the swap header determined by the -application in 2) (of course, this step may be carried out automatically -by the same application that determines the swap file's header offset using the -FIBMAP ioctl) - -OR - -Use a userland suspend application that will set the partition and offset -with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in -Documentation/power/userland-swsusp.txt (this is the only method to suspend -to a swap file allowing the resume to be initiated from an initrd or initramfs -image). - -Now, swsusp will use the swap file in the same way in which it would use a swap -partition. In particular, the swap file has to be active (ie. be present in -/proc/swaps) so that it can be used for suspending. - -Note that if the swap file used for suspending is deleted and recreated, -the location of its header need not be the same as before. Thus every time -this happens the value of the "resume_offset=" kernel command line parameter -has to be updated. diff --git a/Documentation/power/swsusp-dmcrypt.rst b/Documentation/power/swsusp-dmcrypt.rst new file mode 100644 index 000000000000..426df59172cd --- /dev/null +++ b/Documentation/power/swsusp-dmcrypt.rst @@ -0,0 +1,140 @@ +======================================= +How to use dm-crypt and swsusp together +======================================= + +Author: Andreas Steinmetz + + + +Some prerequisites: +You know how dm-crypt works. If not, visit the following web page: +http://www.saout.de/misc/dm-crypt/ +You have read Documentation/power/swsusp.rst and understand it. +You did read Documentation/admin-guide/initrd.rst and know how an initrd works. +You know how to create or how to modify an initrd. + +Now your system is properly set up, your disk is encrypted except for +the swap device(s) and the boot partition which may contain a mini +system for crypto setup and/or rescue purposes. You may even have +an initrd that does your current crypto setup already. + +At this point you want to encrypt your swap, too. Still you want to +be able to suspend using swsusp. This, however, means that you +have to be able to either enter a passphrase or that you read +the key(s) from an external device like a pcmcia flash disk +or an usb stick prior to resume. So you need an initrd, that sets +up dm-crypt and then asks swsusp to resume from the encrypted +swap device. + +The most important thing is that you set up dm-crypt in such +a way that the swap device you suspend to/resume from has +always the same major/minor within the initrd as well as +within your running system. The easiest way to achieve this is +to always set up this swap device first with dmsetup, so that +it will always look like the following:: + + brw------- 1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0 + +Now set up your kernel to use /dev/mapper/swap0 as the default +resume partition, so your kernel .config contains:: + + CONFIG_PM_STD_PARTITION="/dev/mapper/swap0" + +Prepare your boot loader to use the initrd you will create or +modify. For lilo the simplest setup looks like the following +lines:: + + image=/boot/vmlinuz + initrd=/boot/initrd.gz + label=linux + append="root=/dev/ram0 init=/linuxrc rw" + +Finally you need to create or modify your initrd. Lets assume +you create an initrd that reads the required dm-crypt setup +from a pcmcia flash disk card. The card is formatted with an ext2 +fs which resides on /dev/hde1 when the card is inserted. The +card contains at least the encrypted swap setup in a file +named "swapkey". /etc/fstab of your initrd contains something +like the following:: + + /dev/hda1 /mnt ext3 ro 0 0 + none /proc proc defaults,noatime,nodiratime 0 0 + none /sys sysfs defaults,noatime,nodiratime 0 0 + +/dev/hda1 contains an unencrypted mini system that sets up all +of your crypto devices, again by reading the setup from the +pcmcia flash disk. What follows now is a /linuxrc for your +initrd that allows you to resume from encrypted swap and that +continues boot with your mini system on /dev/hda1 if resume +does not happen:: + + #!/bin/sh + PATH=/sbin:/bin:/usr/sbin:/usr/bin + mount /proc + mount /sys + mapped=0 + noresume=`grep -c noresume /proc/cmdline` + if [ "$*" != "" ] + then + noresume=1 + fi + dmesg -n 1 + /sbin/cardmgr -q + for i in 1 2 3 4 5 6 7 8 9 0 + do + if [ -f /proc/ide/hde/media ] + then + usleep 500000 + mount -t ext2 -o ro /dev/hde1 /mnt + if [ -f /mnt/swapkey ] + then + dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1 + fi + umount /mnt + break + fi + usleep 500000 + done + killproc /sbin/cardmgr + dmesg -n 6 + if [ $mapped = 1 ] + then + if [ $noresume != 0 ] + then + mkswap /dev/mapper/swap0 > /dev/null 2>&1 + fi + echo 254:0 > /sys/power/resume + dmsetup remove swap0 + fi + umount /sys + mount /mnt + umount /proc + cd /mnt + pivot_root . mnt + mount /proc + umount -l /mnt + umount /proc + exec chroot . /sbin/init $* < dev/console > dev/console 2>&1 + +Please don't mind the weird loop above, busybox's msh doesn't know +the let statement. Now, what is happening in the script? +First we have to decide if we want to try to resume, or not. +We will not resume if booting with "noresume" or any parameters +for init like "single" or "emergency" as boot parameters. + +Then we need to set up dmcrypt with the setup data from the +pcmcia flash disk. If this succeeds we need to reset the swap +device if we don't want to resume. The line "echo 254:0 > /sys/power/resume" +then attempts to resume from the first device mapper device. +Note that it is important to set the device in /sys/power/resume, +regardless if resuming or not, otherwise later suspend will fail. +If resume starts, script execution terminates here. + +Otherwise we just remove the encrypted swap device and leave it to the +mini system on /dev/hda1 to set the whole crypto up (it is up to +you to modify this to your taste). + +What then follows is the well known process to change the root +file system and continue booting from there. I prefer to unmount +the initrd prior to continue booting but it is up to you to modify +this. diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.txt deleted file mode 100644 index b802fbfd95ef..000000000000 --- a/Documentation/power/swsusp-dmcrypt.txt +++ /dev/null @@ -1,138 +0,0 @@ -Author: Andreas Steinmetz - - -How to use dm-crypt and swsusp together: -======================================== - -Some prerequisites: -You know how dm-crypt works. If not, visit the following web page: -http://www.saout.de/misc/dm-crypt/ -You have read Documentation/power/swsusp.txt and understand it. -You did read Documentation/admin-guide/initrd.rst and know how an initrd works. -You know how to create or how to modify an initrd. - -Now your system is properly set up, your disk is encrypted except for -the swap device(s) and the boot partition which may contain a mini -system for crypto setup and/or rescue purposes. You may even have -an initrd that does your current crypto setup already. - -At this point you want to encrypt your swap, too. Still you want to -be able to suspend using swsusp. This, however, means that you -have to be able to either enter a passphrase or that you read -the key(s) from an external device like a pcmcia flash disk -or an usb stick prior to resume. So you need an initrd, that sets -up dm-crypt and then asks swsusp to resume from the encrypted -swap device. - -The most important thing is that you set up dm-crypt in such -a way that the swap device you suspend to/resume from has -always the same major/minor within the initrd as well as -within your running system. The easiest way to achieve this is -to always set up this swap device first with dmsetup, so that -it will always look like the following: - -brw------- 1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0 - -Now set up your kernel to use /dev/mapper/swap0 as the default -resume partition, so your kernel .config contains: - -CONFIG_PM_STD_PARTITION="/dev/mapper/swap0" - -Prepare your boot loader to use the initrd you will create or -modify. For lilo the simplest setup looks like the following -lines: - -image=/boot/vmlinuz -initrd=/boot/initrd.gz -label=linux -append="root=/dev/ram0 init=/linuxrc rw" - -Finally you need to create or modify your initrd. Lets assume -you create an initrd that reads the required dm-crypt setup -from a pcmcia flash disk card. The card is formatted with an ext2 -fs which resides on /dev/hde1 when the card is inserted. The -card contains at least the encrypted swap setup in a file -named "swapkey". /etc/fstab of your initrd contains something -like the following: - -/dev/hda1 /mnt ext3 ro 0 0 -none /proc proc defaults,noatime,nodiratime 0 0 -none /sys sysfs defaults,noatime,nodiratime 0 0 - -/dev/hda1 contains an unencrypted mini system that sets up all -of your crypto devices, again by reading the setup from the -pcmcia flash disk. What follows now is a /linuxrc for your -initrd that allows you to resume from encrypted swap and that -continues boot with your mini system on /dev/hda1 if resume -does not happen: - -#!/bin/sh -PATH=/sbin:/bin:/usr/sbin:/usr/bin -mount /proc -mount /sys -mapped=0 -noresume=`grep -c noresume /proc/cmdline` -if [ "$*" != "" ] -then - noresume=1 -fi -dmesg -n 1 -/sbin/cardmgr -q -for i in 1 2 3 4 5 6 7 8 9 0 -do - if [ -f /proc/ide/hde/media ] - then - usleep 500000 - mount -t ext2 -o ro /dev/hde1 /mnt - if [ -f /mnt/swapkey ] - then - dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1 - fi - umount /mnt - break - fi - usleep 500000 -done -killproc /sbin/cardmgr -dmesg -n 6 -if [ $mapped = 1 ] -then - if [ $noresume != 0 ] - then - mkswap /dev/mapper/swap0 > /dev/null 2>&1 - fi - echo 254:0 > /sys/power/resume - dmsetup remove swap0 -fi -umount /sys -mount /mnt -umount /proc -cd /mnt -pivot_root . mnt -mount /proc -umount -l /mnt -umount /proc -exec chroot . /sbin/init $* < dev/console > dev/console 2>&1 - -Please don't mind the weird loop above, busybox's msh doesn't know -the let statement. Now, what is happening in the script? -First we have to decide if we want to try to resume, or not. -We will not resume if booting with "noresume" or any parameters -for init like "single" or "emergency" as boot parameters. - -Then we need to set up dmcrypt with the setup data from the -pcmcia flash disk. If this succeeds we need to reset the swap -device if we don't want to resume. The line "echo 254:0 > /sys/power/resume" -then attempts to resume from the first device mapper device. -Note that it is important to set the device in /sys/power/resume, -regardless if resuming or not, otherwise later suspend will fail. -If resume starts, script execution terminates here. - -Otherwise we just remove the encrypted swap device and leave it to the -mini system on /dev/hda1 to set the whole crypto up (it is up to -you to modify this to your taste). - -What then follows is the well known process to change the root -file system and continue booting from there. I prefer to unmount -the initrd prior to continue booting but it is up to you to modify -this. diff --git a/Documentation/power/swsusp.rst b/Documentation/power/swsusp.rst new file mode 100644 index 000000000000..d000312f6965 --- /dev/null +++ b/Documentation/power/swsusp.rst @@ -0,0 +1,501 @@ +============ +Swap suspend +============ + +Some warnings, first. + +.. warning:: + + **BIG FAT WARNING** + + If you touch anything on disk between suspend and resume... + ...kiss your data goodbye. + + If you do resume from initrd after your filesystems are mounted... + ...bye bye root partition. + + [this is actually same case as above] + + If you have unsupported ( ) devices using DMA, you may have some + problems. If your disk driver does not support suspend... (IDE does), + it may cause some problems, too. If you change kernel command line + between suspend and resume, it may do something wrong. If you change + your hardware while system is suspended... well, it was not good idea; + but it will probably only crash. + + ( ) suspend/resume support is needed to make it safe. + + If you have any filesystems on USB devices mounted before software suspend, + they won't be accessible after resume and you may lose data, as though + you have unplugged the USB devices with mounted filesystems on them; + see the FAQ below for details. (This is not true for more traditional + power states like "standby", which normally don't turn USB off.) + +Swap partition: + You need to append resume=/dev/your_swap_partition to kernel command + line or specify it using /sys/power/resume. + +Swap file: + If using a swapfile you can also specify a resume offset using + resume_offset= on the kernel command line or specify it + in /sys/power/resume_offset. + +After preparing then you suspend by:: + + echo shutdown > /sys/power/disk; echo disk > /sys/power/state + +- If you feel ACPI works pretty well on your system, you might try:: + + echo platform > /sys/power/disk; echo disk > /sys/power/state + +- If you would like to write hibernation image to swap and then suspend + to RAM (provided your platform supports it), you can try:: + + echo suspend > /sys/power/disk; echo disk > /sys/power/state + +- If you have SATA disks, you'll need recent kernels with SATA suspend + support. For suspend and resume to work, make sure your disk drivers + are built into kernel -- not modules. [There's way to make + suspend/resume with modular disk drivers, see FAQ, but you probably + should not do that.] + +If you want to limit the suspend image size to N bytes, do:: + + echo N > /sys/power/image_size + +before suspend (it is limited to around 2/5 of available RAM by default). + +- The resume process checks for the presence of the resume device, + if found, it then checks the contents for the hibernation image signature. + If both are found, it resumes the hibernation image. + +- The resume process may be triggered in two ways: + + 1) During lateinit: If resume=/dev/your_swap_partition is specified on + the kernel command line, lateinit runs the resume process. If the + resume device has not been probed yet, the resume process fails and + bootup continues. + 2) Manually from an initrd or initramfs: May be run from + the init script by using the /sys/power/resume file. It is vital + that this be done prior to remounting any filesystems (even as + read-only) otherwise data may be corrupted. + +Article about goals and implementation of Software Suspend for Linux +==================================================================== + +Author: Gábor Kuti +Last revised: 2003-10-20 by Pavel Machek + +Idea and goals to achieve +------------------------- + +Nowadays it is common in several laptops that they have a suspend button. It +saves the state of the machine to a filesystem or to a partition and switches +to standby mode. Later resuming the machine the saved state is loaded back to +ram and the machine can continue its work. It has two real benefits. First we +save ourselves the time machine goes down and later boots up, energy costs +are real high when running from batteries. The other gain is that we don't have +to interrupt our programs so processes that are calculating something for a long +time shouldn't need to be written interruptible. + +swsusp saves the state of the machine into active swaps and then reboots or +powerdowns. You must explicitly specify the swap partition to resume from with +`resume=` kernel option. If signature is found it loads and restores saved +state. If the option `noresume` is specified as a boot parameter, it skips +the resuming. If the option `hibernate=nocompress` is specified as a boot +parameter, it saves hibernation image without compression. + +In the meantime while the system is suspended you should not add/remove any +of the hardware, write to the filesystems, etc. + +Sleep states summary +==================== + +There are three different interfaces you can use, /proc/acpi should +work like this: + +In a really perfect world:: + + echo 1 > /proc/acpi/sleep # for standby + echo 2 > /proc/acpi/sleep # for suspend to ram + echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative + echo 4 > /proc/acpi/sleep # for suspend to disk + echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system + +and perhaps:: + + echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios + +Frequently Asked Questions +========================== + +Q: + well, suspending a server is IMHO a really stupid thing, + but... (Diego Zuccato): + +A: + You bought new UPS for your server. How do you install it without + bringing machine down? Suspend to disk, rearrange power cables, + resume. + + You have your server on UPS. Power died, and UPS is indicating 30 + seconds to failure. What do you do? Suspend to disk. + + +Q: + Maybe I'm missing something, but why don't the regular I/O paths work? + +A: + We do use the regular I/O paths. However we cannot restore the data + to its original location as we load it. That would create an + inconsistent kernel state which would certainly result in an oops. + Instead, we load the image into unused memory and then atomically copy + it back to it original location. This implies, of course, a maximum + image size of half the amount of memory. + + There are two solutions to this: + + * require half of memory to be free during suspend. That way you can + read "new" data onto free spots, then cli and copy + + * assume we had special "polling" ide driver that only uses memory + between 0-640KB. That way, I'd have to make sure that 0-640KB is free + during suspending, but otherwise it would work... + + suspend2 shares this fundamental limitation, but does not include user + data and disk caches into "used memory" by saving them in + advance. That means that the limitation goes away in practice. + +Q: + Does linux support ACPI S4? + +A: + Yes. That's what echo platform > /sys/power/disk does. + +Q: + What is 'suspend2'? + +A: + suspend2 is 'Software Suspend 2', a forked implementation of + suspend-to-disk which is available as separate patches for 2.4 and 2.6 + kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB + highmem and preemption. It also has a extensible architecture that + allows for arbitrary transformations on the image (compression, + encryption) and arbitrary backends for writing the image (eg to swap + or an NFS share[Work In Progress]). Questions regarding suspend2 + should be sent to the mailing list available through the suspend2 + website, and not to the Linux Kernel Mailing List. We are working + toward merging suspend2 into the mainline kernel. + +Q: + What is the freezing of tasks and why are we using it? + +A: + The freezing of tasks is a mechanism by which user space processes and some + kernel threads are controlled during hibernation or system-wide suspend (on some + architectures). See freezing-of-tasks.txt for details. + +Q: + What is the difference between "platform" and "shutdown"? + +A: + shutdown: + save state in linux, then tell bios to powerdown + + platform: + save state in linux, then tell bios to powerdown and blink + "suspended led" + + "platform" is actually right thing to do where supported, but + "shutdown" is most reliable (except on ACPI systems). + +Q: + I do not understand why you have such strong objections to idea of + selective suspend. + +A: + Do selective suspend during runtime power management, that's okay. But + it's useless for suspend-to-disk. (And I do not see how you could use + it for suspend-to-ram, I hope you do not want that). + + Lets see, so you suggest to + + * SUSPEND all but swap device and parents + * Snapshot + * Write image to disk + * SUSPEND swap device and parents + * Powerdown + + Oh no, that does not work, if swap device or its parents uses DMA, + you've corrupted data. You'd have to do + + * SUSPEND all but swap device and parents + * FREEZE swap device and parents + * Snapshot + * UNFREEZE swap device and parents + * Write + * SUSPEND swap device and parents + + Which means that you still need that FREEZE state, and you get more + complicated code. (And I have not yet introduce details like system + devices). + +Q: + There don't seem to be any generally useful behavioral + distinctions between SUSPEND and FREEZE. + +A: + Doing SUSPEND when you are asked to do FREEZE is always correct, + but it may be unnecessarily slow. If you want your driver to stay simple, + slowness may not matter to you. It can always be fixed later. + + For devices like disk it does matter, you do not want to spindown for + FREEZE. + +Q: + After resuming, system is paging heavily, leading to very bad interactivity. + +A: + Try running:: + + cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file + do + test -f "$file" && cat "$file" > /dev/null + done + + after resume. swapoff -a; swapon -a may also be useful. + +Q: + What happens to devices during swsusp? They seem to be resumed + during system suspend? + +A: + That's correct. We need to resume them if we want to write image to + disk. Whole sequence goes like + + **Suspend part** + + running system, user asks for suspend-to-disk + + user processes are stopped + + suspend(PMSG_FREEZE): devices are frozen so that they don't interfere + with state snapshot + + state snapshot: copy of whole used memory is taken with interrupts disabled + + resume(): devices are woken up so that we can write image to swap + + write image to swap + + suspend(PMSG_SUSPEND): suspend devices so that we can power off + + turn the power off + + **Resume part** + + (is actually pretty similar) + + running system, user asks for suspend-to-disk + + user processes are stopped (in common case there are none, + but with resume-from-initrd, no one knows) + + read image from disk + + suspend(PMSG_FREEZE): devices are frozen so that they don't interfere + with image restoration + + image restoration: rewrite memory with image + + resume(): devices are woken up so that system can continue + + thaw all user processes + +Q: + What is this 'Encrypt suspend image' for? + +A: + First of all: it is not a replacement for dm-crypt encrypted swap. + It cannot protect your computer while it is suspended. Instead it does + protect from leaking sensitive data after resume from suspend. + + Think of the following: you suspend while an application is running + that keeps sensitive data in memory. The application itself prevents + the data from being swapped out. Suspend, however, must write these + data to swap to be able to resume later on. Without suspend encryption + your sensitive data are then stored in plaintext on disk. This means + that after resume your sensitive data are accessible to all + applications having direct access to the swap device which was used + for suspend. If you don't need swap after resume these data can remain + on disk virtually forever. Thus it can happen that your system gets + broken in weeks later and sensitive data which you thought were + encrypted and protected are retrieved and stolen from the swap device. + To prevent this situation you should use 'Encrypt suspend image'. + + During suspend a temporary key is created and this key is used to + encrypt the data written to disk. When, during resume, the data was + read back into memory the temporary key is destroyed which simply + means that all data written to disk during suspend are then + inaccessible so they can't be stolen later on. The only thing that + you must then take care of is that you call 'mkswap' for the swap + partition used for suspend as early as possible during regular + boot. This asserts that any temporary key from an oopsed suspend or + from a failed or aborted resume is erased from the swap device. + + As a rule of thumb use encrypted swap to protect your data while your + system is shut down or suspended. Additionally use the encrypted + suspend image to prevent sensitive data from being stolen after + resume. + +Q: + Can I suspend to a swap file? + +A: + Generally, yes, you can. However, it requires you to use the "resume=" and + "resume_offset=" kernel command line parameters, so the resume from a swap file + cannot be initiated from an initrd or initramfs image. See + swsusp-and-swap-files.txt for details. + +Q: + Is there a maximum system RAM size that is supported by swsusp? + +A: + It should work okay with highmem. + +Q: + Does swsusp (to disk) use only one swap partition or can it use + multiple swap partitions (aggregate them into one logical space)? + +A: + Only one swap partition, sorry. + +Q: + If my application(s) causes lots of memory & swap space to be used + (over half of the total system RAM), is it correct that it is likely + to be useless to try to suspend to disk while that app is running? + +A: + No, it should work okay, as long as your app does not mlock() + it. Just prepare big enough swap partition. + +Q: + What information is useful for debugging suspend-to-disk problems? + +A: + Well, last messages on the screen are always useful. If something + is broken, it is usually some kernel driver, therefore trying with as + little as possible modules loaded helps a lot. I also prefer people to + suspend from console, preferably without X running. Booting with + init=/bin/bash, then swapon and starting suspend sequence manually + usually does the trick. Then it is good idea to try with latest + vanilla kernel. + +Q: + How can distributions ship a swsusp-supporting kernel with modular + disk drivers (especially SATA)? + +A: + Well, it can be done, load the drivers, then do echo into + /sys/power/resume file from initrd. Be sure not to mount + anything, not even read-only mount, or you are going to lose your + data. + +Q: + How do I make suspend more verbose? + +A: + If you want to see any non-error kernel messages on the virtual + terminal the kernel switches to during suspend, you have to set the + kernel console loglevel to at least 4 (KERN_WARNING), for example by + doing:: + + # save the old loglevel + read LOGLEVEL DUMMY < /proc/sys/kernel/printk + # set the loglevel so we see the progress bar. + # if the level is higher than needed, we leave it alone. + if [ $LOGLEVEL -lt 5 ]; then + echo 5 > /proc/sys/kernel/printk + fi + + IMG_SZ=0 + read IMG_SZ < /sys/power/image_size + echo -n disk > /sys/power/state + RET=$? + # + # the logic here is: + # if image_size > 0 (without kernel support, IMG_SZ will be zero), + # then try again with image_size set to zero. + if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size + echo 0 > /sys/power/image_size + echo -n disk > /sys/power/state + RET=$? + fi + + # restore previous loglevel + echo $LOGLEVEL > /proc/sys/kernel/printk + exit $RET + +Q: + Is this true that if I have a mounted filesystem on a USB device and + I suspend to disk, I can lose data unless the filesystem has been mounted + with "sync"? + +A: + That's right ... if you disconnect that device, you may lose data. + In fact, even with "-o sync" you can lose data if your programs have + information in buffers they haven't written out to a disk you disconnect, + or if you disconnect before the device finished saving data you wrote. + + Software suspend normally powers down USB controllers, which is equivalent + to disconnecting all USB devices attached to your system. + + Your system might well support low-power modes for its USB controllers + while the system is asleep, maintaining the connection, using true sleep + modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the + /sys/power/state file; write "standby" or "mem".) We've not seen any + hardware that can use these modes through software suspend, although in + theory some systems might support "platform" modes that won't break the + USB connections. + + Remember that it's always a bad idea to unplug a disk drive containing a + mounted filesystem. That's true even when your system is asleep! The + safest thing is to unmount all filesystems on removable media (such USB, + Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays) + before suspending; then remount them after resuming. + + There is a work-around for this problem. For more information, see + Documentation/driver-api/usb/persist.rst. + +Q: + Can I suspend-to-disk using a swap partition under LVM? + +A: + Yes and No. You can suspend successfully, but the kernel will not be able + to resume on its own. You need an initramfs that can recognize the resume + situation, activate the logical volume containing the swap volume (but not + touch any filesystems!), and eventually call:: + + echo -n "$major:$minor" > /sys/power/resume + + where $major and $minor are the respective major and minor device numbers of + the swap volume. + + uswsusp works with LVM, too. See http://suspend.sourceforge.net/ + +Q: + I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were + compiled with the similar configuration files. Anyway I found that + suspend to disk (and resume) is much slower on 2.6.16 compared to + 2.6.15. Any idea for why that might happen or how can I speed it up? + +A: + This is because the size of the suspend image is now greater than + for 2.6.15 (by saving more data we can get more responsive system + after resume). + + There's the /sys/power/image_size knob that controls the size of the + image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as + root), the 2.6.15 behavior should be restored. If it is still too + slow, take a look at suspend.sf.net -- userland suspend is faster and + supports LZF compression to speed it up further. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt deleted file mode 100644 index 236d1fb13640..000000000000 --- a/Documentation/power/swsusp.txt +++ /dev/null @@ -1,446 +0,0 @@ -Some warnings, first. - - * BIG FAT WARNING ********************************************************* - * - * If you touch anything on disk between suspend and resume... - * ...kiss your data goodbye. - * - * If you do resume from initrd after your filesystems are mounted... - * ...bye bye root partition. - * [this is actually same case as above] - * - * If you have unsupported (*) devices using DMA, you may have some - * problems. If your disk driver does not support suspend... (IDE does), - * it may cause some problems, too. If you change kernel command line - * between suspend and resume, it may do something wrong. If you change - * your hardware while system is suspended... well, it was not good idea; - * but it will probably only crash. - * - * (*) suspend/resume support is needed to make it safe. - * - * If you have any filesystems on USB devices mounted before software suspend, - * they won't be accessible after resume and you may lose data, as though - * you have unplugged the USB devices with mounted filesystems on them; - * see the FAQ below for details. (This is not true for more traditional - * power states like "standby", which normally don't turn USB off.) - -Swap partition: -You need to append resume=/dev/your_swap_partition to kernel command -line or specify it using /sys/power/resume. - -Swap file: -If using a swapfile you can also specify a resume offset using -resume_offset= on the kernel command line or specify it -in /sys/power/resume_offset. - -After preparing then you suspend by - -echo shutdown > /sys/power/disk; echo disk > /sys/power/state - -. If you feel ACPI works pretty well on your system, you might try - -echo platform > /sys/power/disk; echo disk > /sys/power/state - -. If you would like to write hibernation image to swap and then suspend -to RAM (provided your platform supports it), you can try - -echo suspend > /sys/power/disk; echo disk > /sys/power/state - -. If you have SATA disks, you'll need recent kernels with SATA suspend -support. For suspend and resume to work, make sure your disk drivers -are built into kernel -- not modules. [There's way to make -suspend/resume with modular disk drivers, see FAQ, but you probably -should not do that.] - -If you want to limit the suspend image size to N bytes, do - -echo N > /sys/power/image_size - -before suspend (it is limited to around 2/5 of available RAM by default). - -. The resume process checks for the presence of the resume device, -if found, it then checks the contents for the hibernation image signature. -If both are found, it resumes the hibernation image. - -. The resume process may be triggered in two ways: - 1) During lateinit: If resume=/dev/your_swap_partition is specified on - the kernel command line, lateinit runs the resume process. If the - resume device has not been probed yet, the resume process fails and - bootup continues. - 2) Manually from an initrd or initramfs: May be run from - the init script by using the /sys/power/resume file. It is vital - that this be done prior to remounting any filesystems (even as - read-only) otherwise data may be corrupted. - -Article about goals and implementation of Software Suspend for Linux -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Author: Gábor Kuti -Last revised: 2003-10-20 by Pavel Machek - -Idea and goals to achieve - -Nowadays it is common in several laptops that they have a suspend button. It -saves the state of the machine to a filesystem or to a partition and switches -to standby mode. Later resuming the machine the saved state is loaded back to -ram and the machine can continue its work. It has two real benefits. First we -save ourselves the time machine goes down and later boots up, energy costs -are real high when running from batteries. The other gain is that we don't have to -interrupt our programs so processes that are calculating something for a long -time shouldn't need to be written interruptible. - -swsusp saves the state of the machine into active swaps and then reboots or -powerdowns. You must explicitly specify the swap partition to resume from with -``resume='' kernel option. If signature is found it loads and restores saved -state. If the option ``noresume'' is specified as a boot parameter, it skips -the resuming. If the option ``hibernate=nocompress'' is specified as a boot -parameter, it saves hibernation image without compression. - -In the meantime while the system is suspended you should not add/remove any -of the hardware, write to the filesystems, etc. - -Sleep states summary -==================== - -There are three different interfaces you can use, /proc/acpi should -work like this: - -In a really perfect world: -echo 1 > /proc/acpi/sleep # for standby -echo 2 > /proc/acpi/sleep # for suspend to ram -echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative -echo 4 > /proc/acpi/sleep # for suspend to disk -echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system - -and perhaps -echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios - -Frequently Asked Questions -========================== - -Q: well, suspending a server is IMHO a really stupid thing, -but... (Diego Zuccato): - -A: You bought new UPS for your server. How do you install it without -bringing machine down? Suspend to disk, rearrange power cables, -resume. - -You have your server on UPS. Power died, and UPS is indicating 30 -seconds to failure. What do you do? Suspend to disk. - - -Q: Maybe I'm missing something, but why don't the regular I/O paths work? - -A: We do use the regular I/O paths. However we cannot restore the data -to its original location as we load it. That would create an -inconsistent kernel state which would certainly result in an oops. -Instead, we load the image into unused memory and then atomically copy -it back to it original location. This implies, of course, a maximum -image size of half the amount of memory. - -There are two solutions to this: - -* require half of memory to be free during suspend. That way you can -read "new" data onto free spots, then cli and copy - -* assume we had special "polling" ide driver that only uses memory -between 0-640KB. That way, I'd have to make sure that 0-640KB is free -during suspending, but otherwise it would work... - -suspend2 shares this fundamental limitation, but does not include user -data and disk caches into "used memory" by saving them in -advance. That means that the limitation goes away in practice. - -Q: Does linux support ACPI S4? - -A: Yes. That's what echo platform > /sys/power/disk does. - -Q: What is 'suspend2'? - -A: suspend2 is 'Software Suspend 2', a forked implementation of -suspend-to-disk which is available as separate patches for 2.4 and 2.6 -kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB -highmem and preemption. It also has a extensible architecture that -allows for arbitrary transformations on the image (compression, -encryption) and arbitrary backends for writing the image (eg to swap -or an NFS share[Work In Progress]). Questions regarding suspend2 -should be sent to the mailing list available through the suspend2 -website, and not to the Linux Kernel Mailing List. We are working -toward merging suspend2 into the mainline kernel. - -Q: What is the freezing of tasks and why are we using it? - -A: The freezing of tasks is a mechanism by which user space processes and some -kernel threads are controlled during hibernation or system-wide suspend (on some -architectures). See freezing-of-tasks.txt for details. - -Q: What is the difference between "platform" and "shutdown"? - -A: - -shutdown: save state in linux, then tell bios to powerdown - -platform: save state in linux, then tell bios to powerdown and blink - "suspended led" - -"platform" is actually right thing to do where supported, but -"shutdown" is most reliable (except on ACPI systems). - -Q: I do not understand why you have such strong objections to idea of -selective suspend. - -A: Do selective suspend during runtime power management, that's okay. But -it's useless for suspend-to-disk. (And I do not see how you could use -it for suspend-to-ram, I hope you do not want that). - -Lets see, so you suggest to - -* SUSPEND all but swap device and parents -* Snapshot -* Write image to disk -* SUSPEND swap device and parents -* Powerdown - -Oh no, that does not work, if swap device or its parents uses DMA, -you've corrupted data. You'd have to do - -* SUSPEND all but swap device and parents -* FREEZE swap device and parents -* Snapshot -* UNFREEZE swap device and parents -* Write -* SUSPEND swap device and parents - -Which means that you still need that FREEZE state, and you get more -complicated code. (And I have not yet introduce details like system -devices). - -Q: There don't seem to be any generally useful behavioral -distinctions between SUSPEND and FREEZE. - -A: Doing SUSPEND when you are asked to do FREEZE is always correct, -but it may be unnecessarily slow. If you want your driver to stay simple, -slowness may not matter to you. It can always be fixed later. - -For devices like disk it does matter, you do not want to spindown for -FREEZE. - -Q: After resuming, system is paging heavily, leading to very bad interactivity. - -A: Try running - -cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file -do - test -f "$file" && cat "$file" > /dev/null -done - -after resume. swapoff -a; swapon -a may also be useful. - -Q: What happens to devices during swsusp? They seem to be resumed -during system suspend? - -A: That's correct. We need to resume them if we want to write image to -disk. Whole sequence goes like - - Suspend part - ~~~~~~~~~~~~ - running system, user asks for suspend-to-disk - - user processes are stopped - - suspend(PMSG_FREEZE): devices are frozen so that they don't interfere - with state snapshot - - state snapshot: copy of whole used memory is taken with interrupts disabled - - resume(): devices are woken up so that we can write image to swap - - write image to swap - - suspend(PMSG_SUSPEND): suspend devices so that we can power off - - turn the power off - - Resume part - ~~~~~~~~~~~ - (is actually pretty similar) - - running system, user asks for suspend-to-disk - - user processes are stopped (in common case there are none, but with resume-from-initrd, no one knows) - - read image from disk - - suspend(PMSG_FREEZE): devices are frozen so that they don't interfere - with image restoration - - image restoration: rewrite memory with image - - resume(): devices are woken up so that system can continue - - thaw all user processes - -Q: What is this 'Encrypt suspend image' for? - -A: First of all: it is not a replacement for dm-crypt encrypted swap. -It cannot protect your computer while it is suspended. Instead it does -protect from leaking sensitive data after resume from suspend. - -Think of the following: you suspend while an application is running -that keeps sensitive data in memory. The application itself prevents -the data from being swapped out. Suspend, however, must write these -data to swap to be able to resume later on. Without suspend encryption -your sensitive data are then stored in plaintext on disk. This means -that after resume your sensitive data are accessible to all -applications having direct access to the swap device which was used -for suspend. If you don't need swap after resume these data can remain -on disk virtually forever. Thus it can happen that your system gets -broken in weeks later and sensitive data which you thought were -encrypted and protected are retrieved and stolen from the swap device. -To prevent this situation you should use 'Encrypt suspend image'. - -During suspend a temporary key is created and this key is used to -encrypt the data written to disk. When, during resume, the data was -read back into memory the temporary key is destroyed which simply -means that all data written to disk during suspend are then -inaccessible so they can't be stolen later on. The only thing that -you must then take care of is that you call 'mkswap' for the swap -partition used for suspend as early as possible during regular -boot. This asserts that any temporary key from an oopsed suspend or -from a failed or aborted resume is erased from the swap device. - -As a rule of thumb use encrypted swap to protect your data while your -system is shut down or suspended. Additionally use the encrypted -suspend image to prevent sensitive data from being stolen after -resume. - -Q: Can I suspend to a swap file? - -A: Generally, yes, you can. However, it requires you to use the "resume=" and -"resume_offset=" kernel command line parameters, so the resume from a swap file -cannot be initiated from an initrd or initramfs image. See -swsusp-and-swap-files.txt for details. - -Q: Is there a maximum system RAM size that is supported by swsusp? - -A: It should work okay with highmem. - -Q: Does swsusp (to disk) use only one swap partition or can it use -multiple swap partitions (aggregate them into one logical space)? - -A: Only one swap partition, sorry. - -Q: If my application(s) causes lots of memory & swap space to be used -(over half of the total system RAM), is it correct that it is likely -to be useless to try to suspend to disk while that app is running? - -A: No, it should work okay, as long as your app does not mlock() -it. Just prepare big enough swap partition. - -Q: What information is useful for debugging suspend-to-disk problems? - -A: Well, last messages on the screen are always useful. If something -is broken, it is usually some kernel driver, therefore trying with as -little as possible modules loaded helps a lot. I also prefer people to -suspend from console, preferably without X running. Booting with -init=/bin/bash, then swapon and starting suspend sequence manually -usually does the trick. Then it is good idea to try with latest -vanilla kernel. - -Q: How can distributions ship a swsusp-supporting kernel with modular -disk drivers (especially SATA)? - -A: Well, it can be done, load the drivers, then do echo into -/sys/power/resume file from initrd. Be sure not to mount -anything, not even read-only mount, or you are going to lose your -data. - -Q: How do I make suspend more verbose? - -A: If you want to see any non-error kernel messages on the virtual -terminal the kernel switches to during suspend, you have to set the -kernel console loglevel to at least 4 (KERN_WARNING), for example by -doing - - # save the old loglevel - read LOGLEVEL DUMMY < /proc/sys/kernel/printk - # set the loglevel so we see the progress bar. - # if the level is higher than needed, we leave it alone. - if [ $LOGLEVEL -lt 5 ]; then - echo 5 > /proc/sys/kernel/printk - fi - - IMG_SZ=0 - read IMG_SZ < /sys/power/image_size - echo -n disk > /sys/power/state - RET=$? - # - # the logic here is: - # if image_size > 0 (without kernel support, IMG_SZ will be zero), - # then try again with image_size set to zero. - if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size - echo 0 > /sys/power/image_size - echo -n disk > /sys/power/state - RET=$? - fi - - # restore previous loglevel - echo $LOGLEVEL > /proc/sys/kernel/printk - exit $RET - -Q: Is this true that if I have a mounted filesystem on a USB device and -I suspend to disk, I can lose data unless the filesystem has been mounted -with "sync"? - -A: That's right ... if you disconnect that device, you may lose data. -In fact, even with "-o sync" you can lose data if your programs have -information in buffers they haven't written out to a disk you disconnect, -or if you disconnect before the device finished saving data you wrote. - -Software suspend normally powers down USB controllers, which is equivalent -to disconnecting all USB devices attached to your system. - -Your system might well support low-power modes for its USB controllers -while the system is asleep, maintaining the connection, using true sleep -modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the -/sys/power/state file; write "standby" or "mem".) We've not seen any -hardware that can use these modes through software suspend, although in -theory some systems might support "platform" modes that won't break the -USB connections. - -Remember that it's always a bad idea to unplug a disk drive containing a -mounted filesystem. That's true even when your system is asleep! The -safest thing is to unmount all filesystems on removable media (such USB, -Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays) -before suspending; then remount them after resuming. - -There is a work-around for this problem. For more information, see -Documentation/driver-api/usb/persist.rst. - -Q: Can I suspend-to-disk using a swap partition under LVM? - -A: Yes and No. You can suspend successfully, but the kernel will not be able -to resume on its own. You need an initramfs that can recognize the resume -situation, activate the logical volume containing the swap volume (but not -touch any filesystems!), and eventually call - -echo -n "$major:$minor" > /sys/power/resume - -where $major and $minor are the respective major and minor device numbers of -the swap volume. - -uswsusp works with LVM, too. See http://suspend.sourceforge.net/ - -Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were -compiled with the similar configuration files. Anyway I found that -suspend to disk (and resume) is much slower on 2.6.16 compared to -2.6.15. Any idea for why that might happen or how can I speed it up? - -A: This is because the size of the suspend image is now greater than -for 2.6.15 (by saving more data we can get more responsive system -after resume). - -There's the /sys/power/image_size knob that controls the size of the -image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as -root), the 2.6.15 behavior should be restored. If it is still too -slow, take a look at suspend.sf.net -- userland suspend is faster and -supports LZF compression to speed it up further. diff --git a/Documentation/power/tricks.rst b/Documentation/power/tricks.rst new file mode 100644 index 000000000000..ca787f142c3f --- /dev/null +++ b/Documentation/power/tricks.rst @@ -0,0 +1,29 @@ +================ +swsusp/S3 tricks +================ + +Pavel Machek + +If you want to trick swsusp/S3 into working, you might want to try: + +* go with minimal config, turn off drivers like USB, AGP you don't + really need + +* turn off APIC and preempt + +* use ext2. At least it has working fsck. [If something seems to go + wrong, force fsck when you have a chance] + +* turn off modules + +* use vga text console, shut down X. [If you really want X, you might + want to try vesafb later] + +* try running as few processes as possible, preferably go to single + user mode. + +* due to video issues, swsusp should be easier to get working than + S3. Try that first. + +When you make it work, try to find out what exactly was it that broke +suspend, and preferably fix that. diff --git a/Documentation/power/tricks.txt b/Documentation/power/tricks.txt deleted file mode 100644 index a1b8f7249f4c..000000000000 --- a/Documentation/power/tricks.txt +++ /dev/null @@ -1,27 +0,0 @@ - swsusp/S3 tricks - ~~~~~~~~~~~~~~~~ -Pavel Machek - -If you want to trick swsusp/S3 into working, you might want to try: - -* go with minimal config, turn off drivers like USB, AGP you don't - really need - -* turn off APIC and preempt - -* use ext2. At least it has working fsck. [If something seems to go - wrong, force fsck when you have a chance] - -* turn off modules - -* use vga text console, shut down X. [If you really want X, you might - want to try vesafb later] - -* try running as few processes as possible, preferably go to single - user mode. - -* due to video issues, swsusp should be easier to get working than - S3. Try that first. - -When you make it work, try to find out what exactly was it that broke -suspend, and preferably fix that. diff --git a/Documentation/power/userland-swsusp.rst b/Documentation/power/userland-swsusp.rst new file mode 100644 index 000000000000..a0fa51bb1a4d --- /dev/null +++ b/Documentation/power/userland-swsusp.rst @@ -0,0 +1,191 @@ +===================================================== +Documentation for userland software suspend interface +===================================================== + + (C) 2006 Rafael J. Wysocki + +First, the warnings at the beginning of swsusp.txt still apply. + +Second, you should read the FAQ in swsusp.txt _now_ if you have not +done it already. + +Now, to use the userland interface for software suspend you need special +utilities that will read/write the system memory snapshot from/to the +kernel. Such utilities are available, for example, from +. You may want to have a look at them if you +are going to develop your own suspend/resume utilities. + +The interface consists of a character device providing the open(), +release(), read(), and write() operations as well as several ioctl() +commands defined in include/linux/suspend_ioctls.h . The major and minor +numbers of the device are, respectively, 10 and 231, and they can +be read from /sys/class/misc/snapshot/dev. + +The device can be open either for reading or for writing. If open for +reading, it is considered to be in the suspend mode. Otherwise it is +assumed to be in the resume mode. The device cannot be open for simultaneous +reading and writing. It is also impossible to have the device open more than +once at a time. + +Even opening the device has side effects. Data structures are +allocated, and PM_HIBERNATION_PREPARE / PM_RESTORE_PREPARE chains are +called. + +The ioctl() commands recognized by the device are: + +SNAPSHOT_FREEZE + freeze user space processes (the current process is + not frozen); this is required for SNAPSHOT_CREATE_IMAGE + and SNAPSHOT_ATOMIC_RESTORE to succeed + +SNAPSHOT_UNFREEZE + thaw user space processes frozen by SNAPSHOT_FREEZE + +SNAPSHOT_CREATE_IMAGE + create a snapshot of the system memory; the + last argument of ioctl() should be a pointer to an int variable, + the value of which will indicate whether the call returned after + creating the snapshot (1) or after restoring the system memory state + from it (0) (after resume the system finds itself finishing the + SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot + has been created the read() operation can be used to transfer + it out of the kernel + +SNAPSHOT_ATOMIC_RESTORE + restore the system memory state from the + uploaded snapshot image; before calling it you should transfer + the system memory snapshot back to the kernel using the write() + operation; this call will not succeed if the snapshot + image is not available to the kernel + +SNAPSHOT_FREE + free memory allocated for the snapshot image + +SNAPSHOT_PREF_IMAGE_SIZE + set the preferred maximum size of the image + (the kernel will do its best to ensure the image size will not exceed + this number, but if it turns out to be impossible, the kernel will + create the smallest image possible) + +SNAPSHOT_GET_IMAGE_SIZE + return the actual size of the hibernation image + +SNAPSHOT_AVAIL_SWAP_SIZE + return the amount of available swap in bytes (the + last argument should be a pointer to an unsigned int variable that will + contain the result if the call is successful). + +SNAPSHOT_ALLOC_SWAP_PAGE + allocate a swap page from the resume partition + (the last argument should be a pointer to a loff_t variable that + will contain the swap page offset if the call is successful) + +SNAPSHOT_FREE_SWAP_PAGES + free all swap pages allocated by + SNAPSHOT_ALLOC_SWAP_PAGE + +SNAPSHOT_SET_SWAP_AREA + set the resume partition and the offset (in + units) from the beginning of the partition at which the swap header is + located (the last ioctl() argument should point to a struct + resume_swap_area, as defined in kernel/power/suspend_ioctls.h, + containing the resume device specification and the offset); for swap + partitions the offset is always 0, but it is different from zero for + swap files (see Documentation/power/swsusp-and-swap-files.rst for + details). + +SNAPSHOT_PLATFORM_SUPPORT + enable/disable the hibernation platform support, + depending on the argument value (enable, if the argument is nonzero) + +SNAPSHOT_POWER_OFF + make the kernel transition the system to the hibernation + state (eg. ACPI S4) using the platform (eg. ACPI) driver + +SNAPSHOT_S2RAM + suspend to RAM; using this call causes the kernel to + immediately enter the suspend-to-RAM state, so this call must always + be preceded by the SNAPSHOT_FREEZE call and it is also necessary + to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call + is needed to implement the suspend-to-both mechanism in which the + suspend image is first created, as though the system had been suspended + to disk, and then the system is suspended to RAM (this makes it possible + to resume the system from RAM if there's enough battery power or restore + its state on the basis of the saved suspend image otherwise) + +The device's read() operation can be used to transfer the snapshot image from +the kernel. It has the following limitations: + +- you cannot read() more than one virtual memory page at a time +- read()s across page boundaries are impossible (ie. if you read() 1/2 of + a page in the previous call, you will only be able to read() + **at most** 1/2 of the page in the next call) + +The device's write() operation is used for uploading the system memory snapshot +into the kernel. It has the same limitations as the read() operation. + +The release() operation frees all memory allocated for the snapshot image +and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any). +Thus it is not necessary to use either SNAPSHOT_FREE or +SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also +unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are +still frozen when the device is being closed). + +Currently it is assumed that the userland utilities reading/writing the +snapshot image from/to the kernel will use a swap partition, called the resume +partition, or a swap file as storage space (if a swap file is used, the resume +partition is the partition that holds this file). However, this is not really +required, as they can use, for example, a special (blank) suspend partition or +a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and +mounted afterwards. + +These utilities MUST NOT make any assumptions regarding the ordering of +data within the snapshot image. The contents of the image are entirely owned +by the kernel and its structure may be changed in future kernel releases. + +The snapshot image MUST be written to the kernel unaltered (ie. all of the image +data, metadata and header MUST be written in _exactly_ the same amount, form +and order in which they have been read). Otherwise, the behavior of the +resumed system may be totally unpredictable. + +While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the +structure of the snapshot image is consistent with the information stored +in the image header. If any inconsistencies are detected, +SNAPSHOT_ATOMIC_RESTORE will not succeed. Still, this is not a fool-proof +mechanism and the userland utilities using the interface SHOULD use additional +means, such as checksums, to ensure the integrity of the snapshot image. + +The suspending and resuming utilities MUST lock themselves in memory, +preferably using mlockall(), before calling SNAPSHOT_FREEZE. + +The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE +in the memory location pointed to by the last argument of ioctl() and proceed +in accordance with it: + +1. If the value is 1 (ie. the system memory snapshot has just been + created and the system is ready for saving it): + + (a) The suspending utility MUST NOT close the snapshot device + _unless_ the whole suspend procedure is to be cancelled, in + which case, if the snapshot image has already been saved, the + suspending utility SHOULD destroy it, preferably by zapping + its header. If the suspend is not to be cancelled, the + system MUST be powered off or rebooted after the snapshot + image has been saved. + (b) The suspending utility SHOULD NOT attempt to perform any + file system operations (including reads) on the file systems + that were mounted before SNAPSHOT_CREATE_IMAGE has been + called. However, it MAY mount a file system that was not + mounted at that time and perform some operations on it (eg. + use it for saving the image). + +2. If the value is 0 (ie. the system state has just been restored from + the snapshot image), the suspending utility MUST close the snapshot + device. Afterwards it will be treated as a regular userland process, + so it need not exit. + +The resuming utility SHOULD NOT attempt to mount any file systems that could +be mounted before suspend and SHOULD NOT attempt to perform any operations +involving such file systems. + +For details, please refer to the source code. diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt deleted file mode 100644 index bbfcd1bbedc5..000000000000 --- a/Documentation/power/userland-swsusp.txt +++ /dev/null @@ -1,170 +0,0 @@ -Documentation for userland software suspend interface - (C) 2006 Rafael J. Wysocki - -First, the warnings at the beginning of swsusp.txt still apply. - -Second, you should read the FAQ in swsusp.txt _now_ if you have not -done it already. - -Now, to use the userland interface for software suspend you need special -utilities that will read/write the system memory snapshot from/to the -kernel. Such utilities are available, for example, from -. You may want to have a look at them if you -are going to develop your own suspend/resume utilities. - -The interface consists of a character device providing the open(), -release(), read(), and write() operations as well as several ioctl() -commands defined in include/linux/suspend_ioctls.h . The major and minor -numbers of the device are, respectively, 10 and 231, and they can -be read from /sys/class/misc/snapshot/dev. - -The device can be open either for reading or for writing. If open for -reading, it is considered to be in the suspend mode. Otherwise it is -assumed to be in the resume mode. The device cannot be open for simultaneous -reading and writing. It is also impossible to have the device open more than -once at a time. - -Even opening the device has side effects. Data structures are -allocated, and PM_HIBERNATION_PREPARE / PM_RESTORE_PREPARE chains are -called. - -The ioctl() commands recognized by the device are: - -SNAPSHOT_FREEZE - freeze user space processes (the current process is - not frozen); this is required for SNAPSHOT_CREATE_IMAGE - and SNAPSHOT_ATOMIC_RESTORE to succeed - -SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE - -SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the - last argument of ioctl() should be a pointer to an int variable, - the value of which will indicate whether the call returned after - creating the snapshot (1) or after restoring the system memory state - from it (0) (after resume the system finds itself finishing the - SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot - has been created the read() operation can be used to transfer - it out of the kernel - -SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the - uploaded snapshot image; before calling it you should transfer - the system memory snapshot back to the kernel using the write() - operation; this call will not succeed if the snapshot - image is not available to the kernel - -SNAPSHOT_FREE - free memory allocated for the snapshot image - -SNAPSHOT_PREF_IMAGE_SIZE - set the preferred maximum size of the image - (the kernel will do its best to ensure the image size will not exceed - this number, but if it turns out to be impossible, the kernel will - create the smallest image possible) - -SNAPSHOT_GET_IMAGE_SIZE - return the actual size of the hibernation image - -SNAPSHOT_AVAIL_SWAP_SIZE - return the amount of available swap in bytes (the - last argument should be a pointer to an unsigned int variable that will - contain the result if the call is successful). - -SNAPSHOT_ALLOC_SWAP_PAGE - allocate a swap page from the resume partition - (the last argument should be a pointer to a loff_t variable that - will contain the swap page offset if the call is successful) - -SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated by - SNAPSHOT_ALLOC_SWAP_PAGE - -SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in - units) from the beginning of the partition at which the swap header is - located (the last ioctl() argument should point to a struct - resume_swap_area, as defined in kernel/power/suspend_ioctls.h, - containing the resume device specification and the offset); for swap - partitions the offset is always 0, but it is different from zero for - swap files (see Documentation/power/swsusp-and-swap-files.txt for - details). - -SNAPSHOT_PLATFORM_SUPPORT - enable/disable the hibernation platform support, - depending on the argument value (enable, if the argument is nonzero) - -SNAPSHOT_POWER_OFF - make the kernel transition the system to the hibernation - state (eg. ACPI S4) using the platform (eg. ACPI) driver - -SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to - immediately enter the suspend-to-RAM state, so this call must always - be preceded by the SNAPSHOT_FREEZE call and it is also necessary - to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call - is needed to implement the suspend-to-both mechanism in which the - suspend image is first created, as though the system had been suspended - to disk, and then the system is suspended to RAM (this makes it possible - to resume the system from RAM if there's enough battery power or restore - its state on the basis of the saved suspend image otherwise) - -The device's read() operation can be used to transfer the snapshot image from -the kernel. It has the following limitations: -- you cannot read() more than one virtual memory page at a time -- read()s across page boundaries are impossible (ie. if you read() 1/2 of - a page in the previous call, you will only be able to read() - _at_ _most_ 1/2 of the page in the next call) - -The device's write() operation is used for uploading the system memory snapshot -into the kernel. It has the same limitations as the read() operation. - -The release() operation frees all memory allocated for the snapshot image -and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any). -Thus it is not necessary to use either SNAPSHOT_FREE or -SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also -unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are -still frozen when the device is being closed). - -Currently it is assumed that the userland utilities reading/writing the -snapshot image from/to the kernel will use a swap partition, called the resume -partition, or a swap file as storage space (if a swap file is used, the resume -partition is the partition that holds this file). However, this is not really -required, as they can use, for example, a special (blank) suspend partition or -a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and -mounted afterwards. - -These utilities MUST NOT make any assumptions regarding the ordering of -data within the snapshot image. The contents of the image are entirely owned -by the kernel and its structure may be changed in future kernel releases. - -The snapshot image MUST be written to the kernel unaltered (ie. all of the image -data, metadata and header MUST be written in _exactly_ the same amount, form -and order in which they have been read). Otherwise, the behavior of the -resumed system may be totally unpredictable. - -While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the -structure of the snapshot image is consistent with the information stored -in the image header. If any inconsistencies are detected, -SNAPSHOT_ATOMIC_RESTORE will not succeed. Still, this is not a fool-proof -mechanism and the userland utilities using the interface SHOULD use additional -means, such as checksums, to ensure the integrity of the snapshot image. - -The suspending and resuming utilities MUST lock themselves in memory, -preferably using mlockall(), before calling SNAPSHOT_FREEZE. - -The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE -in the memory location pointed to by the last argument of ioctl() and proceed -in accordance with it: -1. If the value is 1 (ie. the system memory snapshot has just been - created and the system is ready for saving it): - (a) The suspending utility MUST NOT close the snapshot device - _unless_ the whole suspend procedure is to be cancelled, in - which case, if the snapshot image has already been saved, the - suspending utility SHOULD destroy it, preferably by zapping - its header. If the suspend is not to be cancelled, the - system MUST be powered off or rebooted after the snapshot - image has been saved. - (b) The suspending utility SHOULD NOT attempt to perform any - file system operations (including reads) on the file systems - that were mounted before SNAPSHOT_CREATE_IMAGE has been - called. However, it MAY mount a file system that was not - mounted at that time and perform some operations on it (eg. - use it for saving the image). -2. If the value is 0 (ie. the system state has just been restored from - the snapshot image), the suspending utility MUST close the snapshot - device. Afterwards it will be treated as a regular userland process, - so it need not exit. - -The resuming utility SHOULD NOT attempt to mount any file systems that could -be mounted before suspend and SHOULD NOT attempt to perform any operations -involving such file systems. - -For details, please refer to the source code. diff --git a/Documentation/power/video.rst b/Documentation/power/video.rst new file mode 100644 index 000000000000..337a2ba9f32f --- /dev/null +++ b/Documentation/power/video.rst @@ -0,0 +1,213 @@ +=========================== +Video issues with S3 resume +=========================== + +2003-2006, Pavel Machek + +During S3 resume, hardware needs to be reinitialized. For most +devices, this is easy, and kernel driver knows how to do +it. Unfortunately there's one exception: video card. Those are usually +initialized by BIOS, and kernel does not have enough information to +boot video card. (Kernel usually does not even contain video card +driver -- vesafb and vgacon are widely used). + +This is not problem for swsusp, because during swsusp resume, BIOS is +run normally so video card is normally initialized. It should not be +problem for S1 standby, because hardware should retain its state over +that. + +We either have to run video BIOS during early resume, or interpret it +using vbetool later, or maybe nothing is necessary on particular +system because video state is preserved. Unfortunately different +methods work on different systems, and no known method suits all of +them. + +Userland application called s2ram has been developed; it contains long +whitelist of systems, and automatically selects working method for a +given system. It can be downloaded from CVS at +www.sf.net/projects/suspend . If you get a system that is not in the +whitelist, please try to find a working solution, and submit whitelist +entry so that work does not need to be repeated. + +Currently, VBE_SAVE method (6 below) works on most +systems. Unfortunately, vbetool only runs after userland is resumed, +so it makes debugging of early resume problems +hard/impossible. Methods that do not rely on userland are preferable. + +Details +~~~~~~~ + +There are a few types of systems where video works after S3 resume: + +(1) systems where video state is preserved over S3. + +(2) systems where it is possible to call the video BIOS during S3 + resume. Unfortunately, it is not correct to call the video BIOS at + that point, but it happens to work on some machines. Use + acpi_sleep=s3_bios. + +(3) systems that initialize video card into vga text mode and where + the BIOS works well enough to be able to set video mode. Use + acpi_sleep=s3_mode on these. + +(4) on some systems s3_bios kicks video into text mode, and + acpi_sleep=s3_bios,s3_mode is needed. + +(5) radeon systems, where X can soft-boot your video card. You'll need + a new enough X, and a plain text console (no vesafb or radeonfb). See + http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information. + Alternatively, you should use vbetool (6) instead. + +(6) other radeon systems, where vbetool is enough to bring system back + to life. It needs text console to be working. Do vbetool vbestate + save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool + vbestate restore < /tmp/delme; setfont , and your video + should work. + +(7) on some systems, it is possible to boot most of kernel, and then + POSTing bios works. Ole Rohne has patch to do just that at + http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2. + +(8) on some systems, you can use the video_post utility and or + do echo 3 > /sys/power/state && /usr/sbin/video_post - which will + initialize the display in console mode. If you are in X, you can switch + to a virtual terminal and back to X using CTRL+ALT+F1 - CTRL+ALT+F7 to get + the display working in graphical mode again. + +Now, if you pass acpi_sleep=something, and it does not work with your +bios, you'll get a hard crash during resume. Be careful. Also it is +safest to do your experiments with plain old VGA console. The vesafb +and radeonfb (etc) drivers have a tendency to crash the machine during +resume. + +You may have a system where none of above works. At that point you +either invent another ugly hack that works, or write proper driver for +your video card (good luck getting docs :-(). Maybe suspending from X +(proper X, knowing your hardware, not XF68_FBcon) might have better +chance of working. + +Table of known working notebooks: + + +=============================== =============================================== +Model hack (or "how to do it") +=============================== =============================================== +Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI +Acer TM 230 s3_bios (2) +Acer TM 242FX vbetool (6) +Acer TM C110 video_post (8) +Acer TM C300 vga=normal (only suspend on console, not in X), + vbetool (6) or video_post (8) +Acer TM 4052LCi s3_bios (2) +Acer TM 636Lci s3_bios,s3_mode (4) +Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text + console back +Acer TM 660 ??? [#f1]_ +Acer TM 800 vga=normal, X patches, see webpage (5) + or vbetool (6) +Acer TM 803 vga=normal, X patches, see webpage (5) + or vbetool (6) +Acer TM 803LCi vga=normal, vbetool (6) +Arima W730a vbetool needed (6) +Asus L2400D s3_mode (3) [#f2]_ (S1 also works OK) +Asus L3350M (SiS 740) (6) +Asus L3800C (Radeon M7) s3_bios (2) (S1 also works OK) +Asus M6887Ne vga=normal, s3_bios (2), use radeon driver + instead of fglrx in x.org +Athlon64 desktop prototype s3_bios (2) +Compal CL-50 ??? [#f1]_ +Compaq Armada E500 - P3-700 none (1) (S1 also works OK) +Compaq Evo N620c vga=normal, s3_bios (2) +Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1 +Dell D600, ATI RV250 vga=normal and X, or try vbestate (6) +Dell D610 vga=normal and X (possibly vbestate (6) too, + but not tested) +Dell Inspiron 4000 ??? [#f1]_ +Dell Inspiron 500m ??? [#f1]_ +Dell Inspiron 510m ??? +Dell Inspiron 5150 vbetool needed (6) +Dell Inspiron 600m ??? [#f1]_ +Dell Inspiron 8200 ??? [#f1]_ +Dell Inspiron 8500 ??? [#f1]_ +Dell Inspiron 8600 ??? [#f1]_ +eMachines athlon64 machines vbetool needed (6) (someone please get + me model #s) +HP NC6000 s3_bios, may not use radeonfb (2); + or vbetool (6) +HP NX7000 ??? [#f1]_ +HP Pavilion ZD7000 vbetool post needed, need open-source nv + driver for X +HP Omnibook XE3 athlon version none (1) +HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV +HP Omnibook XE3L-GF vbetool (6) +HP Omnibook 5150 none (1), (S1 also works OK) +IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 + Savage/IX-MV, vesafb gets "interesting" + but X work. +IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with + BIOS 1.04 2002-08-23, but not at all with + BIOS 1.11 2004-11-05 :-(] +IBM TP R32 / Type 2658-MMG none (1) +IBM TP R40 2722B3G ??? [#f1]_ +IBM TP R50p / Type 1832-22U s3_bios (2) +IBM TP R51 none (1) +IBM TP T30 236681A ??? [#f1]_ +IBM TP T40 / Type 2373-MU4 none (1) +IBM TP T40p none (1) +IBM TP R40p s3_bios (2) +IBM TP T41p s3_bios (2), switch to X after resume +IBM TP T42 s3_bios (2) +IBM ThinkPad T42p (2373-GTG) s3_bios (2) +IBM TP X20 ??? [#f1]_ +IBM TP X30 s3_bios, s3_mode (4) +IBM TP X31 / Type 2672-XXH none (1), use radeontool + (http://fdd.com/software/radeon/) to + turn off backlight. +IBM TP X32 none (1), but backlight is on and video is + trashed after long suspend. s3_bios, + s3_mode (4) works too. Perhaps that gets + better results? +IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4) +IBM TP 600e none(1), but a switch to console and + back to X is needed +Medion MD4220 ??? [#f1]_ +Samsung P35 vbetool needed (6) +Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off +Sony Vaio PCG-C1VRX/K s3_bios (2) +Sony Vaio PCG-F403 ??? [#f1]_ +Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver +Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use + radeontool (http://fdd.com/software/radeon/) + to turn off backlight. +Sony Vaio PCG-N505SN ??? [#f1]_ +Sony Vaio vgn-s260 X or boot-radeon can init it (5) +Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will + be blank unless you return to X. +Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4) +Toshiba Libretto L5 none (1) +Toshiba Libretto 100CT/110CT vbetool (6) +Toshiba Portege 3020CT s3_mode (3) +Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) +Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) +Toshiba Satellite 4090XCDT ??? [#f1]_ +Toshiba Satellite P10-554 s3_bios,s3_mode (4)[#f3]_ +Toshiba M30 (2) xor X with nvidia driver using internal AGP +Uniwill 244IIO ??? [#f1]_ +=============================== =============================================== + +Known working desktop systems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=================== ============================= ======================== +Mainboard Graphics card hack (or "how to do it") +=================== ============================= ======================== +Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4) +=================== ============================= ======================== + + +.. [#f1] from https://wiki.ubuntu.com/HoaryPMResults, not sure + which options to use. If you know, please tell me. + +.. [#f2] To be tested with a newer kernel. + +.. [#f3] Not with SMP kernel, UP only. diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt deleted file mode 100644 index 3e6272bc4472..000000000000 --- a/Documentation/power/video.txt +++ /dev/null @@ -1,185 +0,0 @@ - - Video issues with S3 resume - ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 2003-2006, Pavel Machek - -During S3 resume, hardware needs to be reinitialized. For most -devices, this is easy, and kernel driver knows how to do -it. Unfortunately there's one exception: video card. Those are usually -initialized by BIOS, and kernel does not have enough information to -boot video card. (Kernel usually does not even contain video card -driver -- vesafb and vgacon are widely used). - -This is not problem for swsusp, because during swsusp resume, BIOS is -run normally so video card is normally initialized. It should not be -problem for S1 standby, because hardware should retain its state over -that. - -We either have to run video BIOS during early resume, or interpret it -using vbetool later, or maybe nothing is necessary on particular -system because video state is preserved. Unfortunately different -methods work on different systems, and no known method suits all of -them. - -Userland application called s2ram has been developed; it contains long -whitelist of systems, and automatically selects working method for a -given system. It can be downloaded from CVS at -www.sf.net/projects/suspend . If you get a system that is not in the -whitelist, please try to find a working solution, and submit whitelist -entry so that work does not need to be repeated. - -Currently, VBE_SAVE method (6 below) works on most -systems. Unfortunately, vbetool only runs after userland is resumed, -so it makes debugging of early resume problems -hard/impossible. Methods that do not rely on userland are preferable. - -Details -~~~~~~~ - -There are a few types of systems where video works after S3 resume: - -(1) systems where video state is preserved over S3. - -(2) systems where it is possible to call the video BIOS during S3 - resume. Unfortunately, it is not correct to call the video BIOS at - that point, but it happens to work on some machines. Use - acpi_sleep=s3_bios. - -(3) systems that initialize video card into vga text mode and where - the BIOS works well enough to be able to set video mode. Use - acpi_sleep=s3_mode on these. - -(4) on some systems s3_bios kicks video into text mode, and - acpi_sleep=s3_bios,s3_mode is needed. - -(5) radeon systems, where X can soft-boot your video card. You'll need - a new enough X, and a plain text console (no vesafb or radeonfb). See - http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information. - Alternatively, you should use vbetool (6) instead. - -(6) other radeon systems, where vbetool is enough to bring system back - to life. It needs text console to be working. Do vbetool vbestate - save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool - vbestate restore < /tmp/delme; setfont , and your video - should work. - -(7) on some systems, it is possible to boot most of kernel, and then - POSTing bios works. Ole Rohne has patch to do just that at - http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2. - -(8) on some systems, you can use the video_post utility and or - do echo 3 > /sys/power/state && /usr/sbin/video_post - which will - initialize the display in console mode. If you are in X, you can switch - to a virtual terminal and back to X using CTRL+ALT+F1 - CTRL+ALT+F7 to get - the display working in graphical mode again. - -Now, if you pass acpi_sleep=something, and it does not work with your -bios, you'll get a hard crash during resume. Be careful. Also it is -safest to do your experiments with plain old VGA console. The vesafb -and radeonfb (etc) drivers have a tendency to crash the machine during -resume. - -You may have a system where none of above works. At that point you -either invent another ugly hack that works, or write proper driver for -your video card (good luck getting docs :-(). Maybe suspending from X -(proper X, knowing your hardware, not XF68_FBcon) might have better -chance of working. - -Table of known working notebooks: - -Model hack (or "how to do it") ------------------------------------------------------------------------------- -Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI -Acer TM 230 s3_bios (2) -Acer TM 242FX vbetool (6) -Acer TM C110 video_post (8) -Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8) -Acer TM 4052LCi s3_bios (2) -Acer TM 636Lci s3_bios,s3_mode (4) -Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back -Acer TM 660 ??? (*) -Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6) -Acer TM 803 vga=normal, X patches, see webpage (5) or vbetool (6) -Acer TM 803LCi vga=normal, vbetool (6) -Arima W730a vbetool needed (6) -Asus L2400D s3_mode (3)(***) (S1 also works OK) -Asus L3350M (SiS 740) (6) -Asus L3800C (Radeon M7) s3_bios (2) (S1 also works OK) -Asus M6887Ne vga=normal, s3_bios (2), use radeon driver instead of fglrx in x.org -Athlon64 desktop prototype s3_bios (2) -Compal CL-50 ??? (*) -Compaq Armada E500 - P3-700 none (1) (S1 also works OK) -Compaq Evo N620c vga=normal, s3_bios (2) -Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1 -Dell D600, ATI RV250 vga=normal and X, or try vbestate (6) -Dell D610 vga=normal and X (possibly vbestate (6) too, but not tested) -Dell Inspiron 4000 ??? (*) -Dell Inspiron 500m ??? (*) -Dell Inspiron 510m ??? -Dell Inspiron 5150 vbetool needed (6) -Dell Inspiron 600m ??? (*) -Dell Inspiron 8200 ??? (*) -Dell Inspiron 8500 ??? (*) -Dell Inspiron 8600 ??? (*) -eMachines athlon64 machines vbetool needed (6) (someone please get me model #s) -HP NC6000 s3_bios, may not use radeonfb (2); or vbetool (6) -HP NX7000 ??? (*) -HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X -HP Omnibook XE3 athlon version none (1) -HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV -HP Omnibook XE3L-GF vbetool (6) -HP Omnibook 5150 none (1), (S1 also works OK) -IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work. -IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(] -IBM TP R32 / Type 2658-MMG none (1) -IBM TP R40 2722B3G ??? (*) -IBM TP R50p / Type 1832-22U s3_bios (2) -IBM TP R51 none (1) -IBM TP T30 236681A ??? (*) -IBM TP T40 / Type 2373-MU4 none (1) -IBM TP T40p none (1) -IBM TP R40p s3_bios (2) -IBM TP T41p s3_bios (2), switch to X after resume -IBM TP T42 s3_bios (2) -IBM ThinkPad T42p (2373-GTG) s3_bios (2) -IBM TP X20 ??? (*) -IBM TP X30 s3_bios, s3_mode (4) -IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight. -IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results? -IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4) -IBM TP 600e none(1), but a switch to console and back to X is needed -Medion MD4220 ??? (*) -Samsung P35 vbetool needed (6) -Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off -Sony Vaio PCG-C1VRX/K s3_bios (2) -Sony Vaio PCG-F403 ??? (*) -Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver -Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight. -Sony Vaio PCG-N505SN ??? (*) -Sony Vaio vgn-s260 X or boot-radeon can init it (5) -Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X. -Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4) -Toshiba Libretto L5 none (1) -Toshiba Libretto 100CT/110CT vbetool (6) -Toshiba Portege 3020CT s3_mode (3) -Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) -Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) -Toshiba Satellite 4090XCDT ??? (*) -Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****) -Toshiba M30 (2) xor X with nvidia driver using internal AGP -Uniwill 244IIO ??? (*) - -Known working desktop systems -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Mainboard Graphics card hack (or "how to do it") ------------------------------------------------------------------------------- -Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4) - - -(*) from https://wiki.ubuntu.com/HoaryPMResults, not sure - which options to use. If you know, please tell me. - -(***) To be tested with a newer kernel. - -(****) Not with SMP kernel, UP only. diff --git a/Documentation/process/submitting-drivers.rst b/Documentation/process/submitting-drivers.rst index 58bc047e7b95..1acaa14903d6 100644 --- a/Documentation/process/submitting-drivers.rst +++ b/Documentation/process/submitting-drivers.rst @@ -117,7 +117,7 @@ PM support: implemented") error. You should also try to make sure that your driver uses as little power as possible when it's not doing anything. For the driver testing instructions see - Documentation/power/drivers-testing.txt and for a relatively + Documentation/power/drivers-testing.rst and for a relatively complete overview of the power management issues related to drivers see :ref:`Documentation/driver-api/pm/devices.rst `. diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt index 197d81f4b836..d97207b9accb 100644 --- a/Documentation/scheduler/sched-energy.txt +++ b/Documentation/scheduler/sched-energy.txt @@ -22,7 +22,7 @@ the highest. The actual EM used by EAS is _not_ maintained by the scheduler, but by a dedicated framework. For details about this framework and what it provides, -please refer to its documentation (see Documentation/power/energy-model.txt). +please refer to its documentation (see Documentation/power/energy-model.rst). 2. Background and Terminology @@ -81,7 +81,7 @@ through the arch_scale_cpu_capacity() callback. The rest of platform knowledge used by EAS is directly read from the Energy Model (EM) framework. The EM of a platform is composed of a power cost table -per 'performance domain' in the system (see Documentation/power/energy-model.txt +per 'performance domain' in the system (see Documentation/power/energy-model.rst for futher details about performance domains). The scheduler manages references to the EM objects in the topology code when the @@ -352,7 +352,7 @@ could be amended in the future if proven otherwise. EAS uses the EM of a platform to estimate the impact of scheduling decisions on energy. So, your platform must provide power cost tables to the EM framework in order to make EAS start. To do so, please refer to documentation of the -independent EM framework in Documentation/power/energy-model.txt. +independent EM framework in Documentation/power/energy-model.rst. Please also note that the scheduling domains need to be re-built after the EM has been registered in order to start EAS. diff --git a/Documentation/trace/coresight-cpu-debug.txt b/Documentation/trace/coresight-cpu-debug.txt index f07e38094b40..1a660a39e3c0 100644 --- a/Documentation/trace/coresight-cpu-debug.txt +++ b/Documentation/trace/coresight-cpu-debug.txt @@ -151,7 +151,7 @@ At the runtime you can disable idle states with below methods: It is possible to disable CPU idle states by way of the PM QoS subsystem, more specifically by using the "/dev/cpu_dma_latency" -interface (see Documentation/power/pm_qos_interface.txt for more +interface (see Documentation/power/pm_qos_interface.rst for more details). As specified in the PM QoS documentation the requested parameter will stay in effect until the file descriptor is released. For example: diff --git a/Documentation/translations/zh_CN/process/submitting-drivers.rst b/Documentation/translations/zh_CN/process/submitting-drivers.rst index 72c6cd935821..f1c3906c69a8 100644 --- a/Documentation/translations/zh_CN/process/submitting-drivers.rst +++ b/Documentation/translations/zh_CN/process/submitting-drivers.rst @@ -97,7 +97,7 @@ Linux 2.6: 函数定义成返回 -ENOSYS(功能未实现)错误。你还应该尝试确 保你的驱动在什么都不干的情况下将耗电降到最低。要获得驱动 程序测试的指导,请参阅 - Documentation/power/drivers-testing.txt。有关驱动程序电 + Documentation/power/drivers-testing.rst。有关驱动程序电 源管理问题相对全面的概述,请参阅 Documentation/driver-api/pm/devices.rst。 diff --git a/MAINTAINERS b/MAINTAINERS index 9c382053ce6a..5a6137df3f0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6446,7 +6446,7 @@ M: "Rafael J. Wysocki" M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported -F: Documentation/power/freezing-of-tasks.txt +F: Documentation/power/freezing-of-tasks.rst F: include/linux/freezer.h F: kernel/freezer.c @@ -11764,7 +11764,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git F: drivers/opp/ F: include/linux/pm_opp.h -F: Documentation/power/opp.txt +F: Documentation/power/opp.rst F: Documentation/devicetree/bindings/opp/ OPL4 DRIVER diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..77a724771dbb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2447,7 +2447,7 @@ menuconfig APM machines with more than one CPU. In order to use APM, you will need supporting software. For location - and more information, read + and more information, read and the Battery Powered Linux mini-HOWTO, available from . diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 066fd2a12851..10d040e2e807 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1175,7 +1175,7 @@ struct skl_wm_params { * to be disabled. This shouldn't happen and we'll print some error messages in * case it happens. * - * For more, read the Documentation/power/runtime_pm.txt. + * For more, read the Documentation/power/runtime_pm.rst. */ struct i915_runtime_pm { atomic_t wakeref_count; diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig index a7fbb93f302c..1f64a3d46c8a 100644 --- a/drivers/opp/Kconfig +++ b/drivers/opp/Kconfig @@ -10,4 +10,4 @@ config PM_OPP OPP layer organizes the data internally using device pointers representing individual voltage domains and provides SOC implementations a ready to use framework to manage OPPs. - For more information, read + For more information, read diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index f7033ecf6d0b..11f9c875b028 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -607,7 +607,7 @@ int power_supply_get_battery_info(struct power_supply *psy, /* The property and field names below must correspond to elements * in enum power_supply_property. For reasoning, see - * Documentation/power/power_supply_class.txt. + * Documentation/power/power_supply_class.rst. */ of_property_read_u32(battery_np, "energy-full-design-microwatt-hours", diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c7eef32e7739..5b8328a99b2a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -52,7 +52,7 @@ * irq line disabled until the threaded handler has been run. * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee * that this interrupt will wake the system from a suspended - * state. See Documentation/power/suspend-and-interrupts.txt + * state. See Documentation/power/suspend-and-interrupts.rst * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device diff --git a/include/linux/pci.h b/include/linux/pci.h index b74b2a4e6df2..3d9a167ca5c3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -807,7 +807,7 @@ struct module; * @suspend_late: Put device into low power state. * @resume_early: Wake device from low power state. * @resume: Wake device from low power state. - * (Please see Documentation/power/pci.txt for descriptions + * (Please see Documentation/power/pci.rst for descriptions * of PCI Power Management and the related functions.) * @shutdown: Hook into reboot_notifier_list (kernel/sys.c). * Intended to stop any idling DMA operations. diff --git a/include/linux/pm.h b/include/linux/pm.h index 66c19a65a514..c14ad8bc1a41 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -284,7 +284,7 @@ typedef struct pm_message { * actions to be performed by a device driver's callbacks generally depend on * the platform and subsystem the device belongs to. * - * Refer to Documentation/power/runtime_pm.txt for more information about the + * Refer to Documentation/power/runtime_pm.rst for more information about the * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle() * callbacks in device runtime power management. */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 9bbaaab14b36..7a4dda9e5309 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -65,7 +65,7 @@ config HIBERNATION need to run mkswap against the swap partition used for the suspend. It also works with swap files to a limited extent (for details see - ). + ). Right now you may boot without resuming and resume later but in the meantime you cannot use the swap partition(s)/file(s) involved in @@ -74,7 +74,7 @@ config HIBERNATION MOUNT any journaled filesystems mounted before the suspend or they will get corrupted in a nasty way. - For more information take a look at . + For more information take a look at . config ARCH_SAVE_PAGE_KEYS bool @@ -255,7 +255,7 @@ config APM_EMULATION notification of APM "events" (e.g. battery status change). In order to use APM, you will need supporting software. For location - and more information, read + and more information, read and the Battery Powered Linux mini-HOWTO, available from . diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 41722046b937..0cd26289bfbc 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -165,7 +165,7 @@ config CFG80211_DEFAULT_PS If this causes your applications to misbehave you should fix your applications instead -- they need to register their network - latency requirement, see Documentation/power/pm_qos_interface.txt. + latency requirement, see Documentation/power/pm_qos_interface.rst. config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" -- cgit v1.2.3 From e1714daad7cf8fe4d6dd91adcfbbdd0604b0210d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Jun 2019 10:25:31 +0200 Subject: i2c: headers: don't use 'dev' as adapter variable It is not a struct device, so 'dev' is confusing. Use 'adap', the most common name. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e982b8913b73..6bd199cfe61f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -703,14 +703,14 @@ struct i2c_adapter { }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -static inline void *i2c_get_adapdata(const struct i2c_adapter *dev) +static inline void *i2c_get_adapdata(const struct i2c_adapter *adap) { - return dev_get_drvdata(&dev->dev); + return dev_get_drvdata(&adap->dev); } -static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) +static inline void i2c_set_adapdata(struct i2c_adapter *adap, void *data) { - dev_set_drvdata(&dev->dev, data); + dev_set_drvdata(&adap->dev, data); } static inline struct i2c_adapter * -- cgit v1.2.3 From d68222d4d6647611be5a32c80a53a145e7c80ce9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Jun 2019 10:25:32 +0200 Subject: i2c: headers: always have a named variable in arguments Much better to read and understand. Naming for i2c_adapter is not consistent (yet), so use the name which is also used in core code. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 6bd199cfe61f..14e04fb4f46f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -40,7 +40,8 @@ struct i2c_device_identity; union i2c_smbus_data; struct i2c_board_info; enum i2c_slave_event; -typedef int (*i2c_slave_cb_t)(struct i2c_client *, enum i2c_slave_event, u8 *); +typedef int (*i2c_slave_cb_t)(struct i2c_client *client, + enum i2c_slave_event event, u8 *val); struct module; struct property_entry; @@ -257,16 +258,16 @@ struct i2c_driver { unsigned int class; /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *, const struct i2c_device_id *); - int (*remove)(struct i2c_client *); + int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); + int (*remove)(struct i2c_client *client); /* New driver model interface to aid the seamless removal of the * current probe()'s, more commonly unused than used second parameter. */ - int (*probe_new)(struct i2c_client *); + int (*probe_new)(struct i2c_client *client); /* driver model interfaces that don't relate to enumeration */ - void (*shutdown)(struct i2c_client *); + void (*shutdown)(struct i2c_client *client); /* Alert callback, for example for the SMBus alert protocol. * The format and meaning of the data value depends on the protocol. @@ -275,7 +276,7 @@ struct i2c_driver { * For the SMBus Host Notify protocol, the data corresponds to the * 16-bit payload data reported by the slave device acting as master. */ - void (*alert)(struct i2c_client *, enum i2c_alert_protocol protocol, + void (*alert)(struct i2c_client *client, enum i2c_alert_protocol protocol, unsigned int data); /* a ioctl like command that can be used to perform specific functions @@ -287,7 +288,7 @@ struct i2c_driver { const struct i2c_device_id *id_table; /* Device detection callback for automatic device creation */ - int (*detect)(struct i2c_client *, struct i2c_board_info *); + int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; @@ -447,10 +448,10 @@ extern struct i2c_client * i2c_new_probed_device(struct i2c_adapter *adap, struct i2c_board_info *info, unsigned short const *addr_list, - int (*probe)(struct i2c_adapter *, unsigned short addr)); + int (*probe)(struct i2c_adapter *adap, unsigned short addr)); /* Common custom probe functions */ -extern int i2c_probe_func_quick_read(struct i2c_adapter *, unsigned short addr); +extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); /* For devices that use several addresses, use i2c_new_dummy() to make * client handles for the extra addresses. @@ -466,7 +467,7 @@ i2c_new_secondary_device(struct i2c_client *client, const char *name, u16 default_addr); -extern void i2c_unregister_device(struct i2c_client *); +extern void i2c_unregister_device(struct i2c_client *client); #endif /* I2C */ /* Mainboard arch_initcall() code should register all its I2C devices. @@ -551,9 +552,9 @@ struct i2c_algorithm { * The main operations are wrapped by i2c_lock_bus and i2c_unlock_bus. */ struct i2c_lock_operations { - void (*lock_bus)(struct i2c_adapter *, unsigned int flags); - int (*trylock_bus)(struct i2c_adapter *, unsigned int flags); - void (*unlock_bus)(struct i2c_adapter *, unsigned int flags); + void (*lock_bus)(struct i2c_adapter *adapter, unsigned int flags); + int (*trylock_bus)(struct i2c_adapter *adapter, unsigned int flags); + void (*unlock_bus)(struct i2c_adapter *adapter, unsigned int flags); }; /** @@ -726,7 +727,7 @@ i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) return NULL; } -int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *)); +int i2c_for_each_dev(void *data, int (*fn)(struct device *dev, void *data)); /* Adapter locking functions, exported for shared pin cases */ #define I2C_LOCK_ROOT_ADAPTER BIT(0) @@ -832,12 +833,12 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) /* administration... */ #if IS_ENABLED(CONFIG_I2C) -extern int i2c_add_adapter(struct i2c_adapter *); -extern void i2c_del_adapter(struct i2c_adapter *); -extern int i2c_add_numbered_adapter(struct i2c_adapter *); +extern int i2c_add_adapter(struct i2c_adapter *adap); +extern void i2c_del_adapter(struct i2c_adapter *adap); +extern int i2c_add_numbered_adapter(struct i2c_adapter *adap); -extern int i2c_register_driver(struct module *, struct i2c_driver *); -extern void i2c_del_driver(struct i2c_driver *); +extern int i2c_register_driver(struct module *owner, struct i2c_driver *driver); +extern void i2c_del_driver(struct i2c_driver *driver); /* use a define to avoid include chaining to get THIS_MODULE */ #define i2c_add_driver(driver) \ -- cgit v1.2.3 From 2caea56f569ac361fc854f6bf2fe94b70514c917 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Jun 2019 10:25:34 +0200 Subject: i2c: headers: update docs about I2C_CLIENT_* Update kerneldoc for i2c client flags because they increased over time. Also, move them to a position where they can be more easily found. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 14e04fb4f46f..9853fae9b505 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -298,8 +298,7 @@ struct i2c_driver { /** * struct i2c_client - represent an I2C slave device - * @flags: I2C_CLIENT_TEN indicates the device uses a ten bit chip address; - * I2C_CLIENT_PEC indicates it uses SMBus Packet Error Checking + * @flags: see I2C_CLIENT_* for possible flags * @addr: Address used on the I2C bus connected to the parent adapter. * @name: Indicates the type of the device, usually a chip name that's * generic enough to hide second-sourcing and compatible revisions. @@ -317,6 +316,15 @@ struct i2c_driver { */ struct i2c_client { unsigned short flags; /* div., see below */ +#define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ +#define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ + /* Must equal I2C_M_TEN below */ +#define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ +#define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ +#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ +#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ + /* Must match I2C_M_STOP|IGNORE_NAK */ + unsigned short addr; /* chip address - NOTE: 7bit */ /* addresses are stored in the */ /* _LOWER_ 7 bits */ @@ -803,16 +811,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); } -/*flags for the client struct: */ -#define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ -#define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ - /* Must equal I2C_M_TEN below */ -#define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ -#define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ -#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ -#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ - /* Must match I2C_M_STOP|IGNORE_NAK */ - /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -- cgit v1.2.3 From 76cc9f0efd952d376e93e79b1f19fd6fdb8291bc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Jun 2019 10:25:35 +0200 Subject: i2c: headers: reformat header comment and update copyright Let's stick to coding style. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9853fae9b505..d8f9060179d0 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1,16 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* ------------------------------------------------------------------------- */ -/* */ -/* i2c.h - definitions for the i2c-bus interface */ -/* */ -/* ------------------------------------------------------------------------- */ -/* Copyright (C) 1995-2000 Simon G. Vogl - +/* + * i2c.h - definitions for the Linux i2c bus interface + * Copyright (C) 1995-2000 Simon G. Vogl + * Copyright (C) 2013-2019 Wolfram Sang + * + * With some changes from Kyösti Mälkki and + * Frodo Looijaard */ -/* ------------------------------------------------------------------------- */ - -/* With some changes from Kyösti Mälkki and - Frodo Looijaard */ #ifndef _LINUX_I2C_H #define _LINUX_I2C_H -- cgit v1.2.3 From 7f94208c8f9a0a6d2ff0e0c0858c00ad8e5c8617 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 12 Jun 2019 17:18:47 +0800 Subject: bpf: Fix build error without CONFIG_INET If CONFIG_INET is not set, building fails: kernel/bpf/verifier.o: In function `check_mem_access': verifier.c: undefined reference to `bpf_xdp_sock_is_valid_access' kernel/bpf/verifier.o: In function `convert_ctx_accesses': verifier.c: undefined reference to `bpf_xdp_sock_convert_ctx_access' Reported-by: Hulk Robot Fixes: fada7fdc83c0 ("bpf: Allow bpf_map_lookup_elem() on an xskmap") Signed-off-by: YueHaibing Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1fe137afa898..b15fb5fcb741 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -728,13 +728,6 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_flush(struct bpf_map *map); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); -bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, - struct bpf_insn_access_aux *info); -u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, - const struct bpf_insn *si, - struct bpf_insn *insn_buf, - struct bpf_prog *prog, - u32 *target_size); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) @@ -1110,6 +1103,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); + +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); + +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); #else static inline bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, @@ -1126,6 +1128,21 @@ static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } +static inline bool bpf_xdp_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} #endif /* CONFIG_INET */ #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 7c86f20d15b7c1132e0c24358ce240ba4cb002b7 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 12 Jun 2019 21:31:15 +0200 Subject: net: stmmac: use GPIO descriptors in stmmac_mdio_reset Switch stmmac_mdio_reset to use GPIO descriptors. GPIO core handles the "snps,reset-gpio" for GPIO descriptors so we don't need to take care of it inside the driver anymore. The advantage of this is that we now preserve the GPIO flags which are passed via devicetree. This is required on some newer Amlogic boards which use an Open Drain pin for the reset GPIO. This pin can only output a LOW signal or switch to input mode but it cannot output a HIGH signal. There are already devicetree bindings for these special cases and GPIO core already takes care of them but only if we use GPIO descriptors instead of GPIO numbers. Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 27 +++++++++++------------ include/linux/stmmac.h | 2 +- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 093a223fe408..f1c39dd048e7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -20,11 +20,11 @@ Maintainer: Giuseppe Cavallaro *******************************************************************************/ +#include #include #include #include #include -#include #include #include #include @@ -251,37 +251,36 @@ int stmmac_mdio_reset(struct mii_bus *bus) #ifdef CONFIG_OF if (priv->device->of_node) { + struct gpio_desc *reset_gpio; + if (data->reset_gpio < 0) { struct device_node *np = priv->device->of_node; if (!np) return 0; - data->reset_gpio = of_get_named_gpio(np, - "snps,reset-gpio", 0); - if (data->reset_gpio < 0) - return 0; + reset_gpio = devm_gpiod_get_optional(priv->device, + "snps,reset", + GPIOD_OUT_LOW); + if (IS_ERR(reset_gpio)) + return PTR_ERR(reset_gpio); - data->active_low = of_property_read_bool(np, - "snps,reset-active-low"); of_property_read_u32_array(np, "snps,reset-delays-us", data->delays, 3); + } else { + reset_gpio = gpio_to_desc(data->reset_gpio); - if (devm_gpio_request(priv->device, data->reset_gpio, - "mdio-reset")) - return 0; + gpiod_direction_output(reset_gpio, 0); } - gpio_direction_output(data->reset_gpio, - data->active_low ? 1 : 0); if (data->delays[0]) msleep(DIV_ROUND_UP(data->delays[0], 1000)); - gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1); + gpiod_set_value_cansleep(reset_gpio, 1); if (data->delays[1]) msleep(DIV_ROUND_UP(data->delays[1], 1000)); - gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0); + gpiod_set_value_cansleep(reset_gpio, 0); if (data->delays[2]) msleep(DIV_ROUND_UP(data->delays[2], 1000)); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 4335bd771ce5..816edb545592 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -97,7 +97,7 @@ struct stmmac_mdio_bus_data { int *irqs; int probed_phy_irq; #ifdef CONFIG_OF - int reset_gpio, active_low; + int reset_gpio; u32 delays[3]; #endif }; -- cgit v1.2.3 From f01c373fbeed9f5870bb056b65750ccef42f1f20 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 13 Jun 2019 11:08:15 -0400 Subject: locking/static_key: always define static_branch_deferred_inc This interface is currently only defined if CONFIG_JUMP_LABEL. Make it available also when jump labels are off. Signed-off-by: Willem de Bruijn Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/jump_label_ratelimit.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 42710d5949ba..8c3ee291b2d8 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -60,8 +60,6 @@ extern void jump_label_update_timeout(struct work_struct *work); 0), \ } -#define static_branch_deferred_inc(x) static_branch_inc(&(x)->key) - #else /* !CONFIG_JUMP_LABEL */ struct static_key_deferred { struct static_key key; @@ -95,4 +93,7 @@ jump_label_rate_limit(struct static_key_deferred *key, STATIC_KEY_CHECK_USE(key); } #endif /* CONFIG_JUMP_LABEL */ + +#define static_branch_deferred_inc(x) static_branch_inc(&(x)->key) + #endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */ -- cgit v1.2.3 From 7928260539f3a13b5b23a3fa0a7c0e4f5255940b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 8 Jun 2019 11:39:05 +0200 Subject: processor: remove spin_cpu_yield spin_cpu_yield is unused, therefore remove it. Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Signed-off-by: Heiko Carstens --- arch/powerpc/include/asm/processor.h | 2 -- include/linux/processor.h | 9 --------- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ef573fe9873e..a9993e7a443b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_cpu_relax() barrier() -#define spin_cpu_yield() spin_cpu_relax() - #define spin_end() HMT_medium() #define spin_until_cond(cond) \ diff --git a/include/linux/processor.h b/include/linux/processor.h index dbc952eec869..dc78bdc7079a 100644 --- a/include/linux/processor.h +++ b/include/linux/processor.h @@ -32,15 +32,6 @@ #define spin_cpu_relax() cpu_relax() #endif -/* - * spin_cpu_yield may be called to yield (undirected) to the hypervisor if - * necessary. This should be used if the wait is expected to take longer - * than context switch overhead, but we can't sleep or do a directed yield. - */ -#ifndef spin_cpu_yield -#define spin_cpu_yield() cpu_relax_yield() -#endif - #ifndef spin_end #define spin_end() #endif -- cgit v1.2.3 From 38f2c691a4b3e89d476f8e8350d1ca299974b89d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 17 May 2019 12:50:42 +0200 Subject: s390: improve wait logic of stop_machine The stop_machine loop to advance the state machine and to wait for all affected CPUs to check-in calls cpu_relax_yield in a tight loop until the last missing CPUs acknowledged the state transition. On a virtual system where not all logical CPUs are backed by real CPUs all the time it can take a while for all CPUs to check-in. With the current definition of cpu_relax_yield a diagnose 0x44 is done which tells the hypervisor to schedule *some* other CPU. That can be any CPU and not necessarily one of the CPUs that need to run in order to advance the state machine. This can lead to a pretty bad diagnose 0x44 storm until the last missing CPU finally checked-in. Replace the undirected cpu_relax_yield based on diagnose 0x44 with a directed yield. Each CPU in the wait loop will pick up the next CPU in the cpumask of stop_machine. The diagnose 0x9c is used to tell the hypervisor to run this next CPU instead of the current one. If there is only a limited number of real CPUs backing the virtual CPUs we end up with the real CPUs passed around in a round-robin fashion. [heiko.carstens@de.ibm.com]: Use cpumask_next_wrap as suggested by Peter Zijlstra. Signed-off-by: Martin Schwidefsky Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 3 ++- arch/s390/kernel/processor.c | 17 ++++++++++++----- arch/s390/kernel/smp.c | 2 +- include/linux/sched.h | 2 +- kernel/stop_machine.c | 14 +++++++++----- 5 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0fcbc37b637..445ce9ee4404 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -36,6 +36,7 @@ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -225,7 +226,7 @@ static __no_kasan_or_inline unsigned short stap(void) * Give up the time slice of the virtual PU. */ #define cpu_relax_yield cpu_relax_yield -void cpu_relax_yield(void); +void cpu_relax_yield(const struct cpumask *cpumask); #define cpu_relax() barrier() diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5de13307b703..4cdaefec1b7c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -31,6 +31,7 @@ struct cpu_info { }; static DEFINE_PER_CPU(struct cpu_info, cpu_info); +static DEFINE_PER_CPU(int, cpu_relax_retry); static bool machine_has_cpu_mhz; @@ -58,13 +59,19 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax_yield(void) +void notrace cpu_relax_yield(const struct cpumask *cpumask) { - if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { - diag_stat_inc(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); + int cpu, this_cpu; + + this_cpu = smp_processor_id(); + if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { + __this_cpu_write(cpu_relax_retry, 0); + cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + if (cpu >= nr_cpu_ids) + return; + if (arch_vcpu_is_preempted(cpu)) + smp_yield_cpu(cpu); } - barrier(); } EXPORT_SYMBOL(cpu_relax_yield); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f00955940694..44974654cbd0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -414,7 +414,7 @@ void smp_yield_cpu(int cpu) diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44) { + } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { diag_stat_inc_norecursion(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1f9f3160da7e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1519,7 +1519,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma #endif #ifndef cpu_relax_yield -#define cpu_relax_yield() cpu_relax() +#define cpu_relax_yield(cpumask) cpu_relax() #endif extern int yield_to(struct task_struct *p, bool preempt); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2b5a6754646f..b8b0c5ff8da9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -183,6 +183,7 @@ static int multi_cpu_stop(void *data) struct multi_stop_data *msdata = data; enum multi_stop_state curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; + const struct cpumask *cpumask; unsigned long flags; bool is_active; @@ -192,15 +193,18 @@ static int multi_cpu_stop(void *data) */ local_save_flags(flags); - if (!msdata->active_cpus) - is_active = cpu == cpumask_first(cpu_online_mask); - else - is_active = cpumask_test_cpu(cpu, msdata->active_cpus); + if (!msdata->active_cpus) { + cpumask = cpu_online_mask; + is_active = cpu == cpumask_first(cpumask); + } else { + cpumask = msdata->active_cpus; + is_active = cpumask_test_cpu(cpu, cpumask); + } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(); + cpu_relax_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { -- cgit v1.2.3 From 4ecf0a43e729a7e641d800c294faabe87378fc05 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 8 Jun 2019 12:13:57 +0200 Subject: processor: get rid of cpu_relax_yield stop_machine is the only user left of cpu_relax_yield. Given that it now has special semantics which are tied to stop_machine introduce a weak stop_machine_yield function which architectures can override, and get rid of the generic cpu_relax_yield implementation. Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 6 ------ arch/s390/kernel/processor.c | 4 ++-- include/linux/sched.h | 4 ---- include/linux/stop_machine.h | 1 + kernel/stop_machine.c | 7 ++++++- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 445ce9ee4404..14883b1562e0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -222,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void) return cpu_address; } -/* - * Give up the time slice of the virtual PU. - */ -#define cpu_relax_yield cpu_relax_yield -void cpu_relax_yield(const struct cpumask *cpumask); - #define cpu_relax() barrier() #define ECAG_CACHE_ATTRIBUTE 0 diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 4cdaefec1b7c..6ebc2117c66c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include @@ -59,7 +60,7 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax_yield(const struct cpumask *cpumask) +void notrace stop_machine_yield(const struct cpumask *cpumask) { int cpu, this_cpu; @@ -73,7 +74,6 @@ void notrace cpu_relax_yield(const struct cpumask *cpumask) smp_yield_cpu(cpu); } } -EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f9f3160da7e..911675416b05 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1518,10 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma } #endif -#ifndef cpu_relax_yield -#define cpu_relax_yield(cpumask) cpu_relax() -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6d3635c86dbe..f9a0c6189852 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -36,6 +36,7 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); +void stop_machine_yield(const struct cpumask *cpumask); #else /* CONFIG_SMP */ diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b8b0c5ff8da9..b4f83f7bdf86 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -177,6 +177,11 @@ static void ack_state(struct multi_stop_data *msdata) set_state(msdata, msdata->state + 1); } +void __weak stop_machine_yield(const struct cpumask *cpumask) +{ + cpu_relax(); +} + /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { @@ -204,7 +209,7 @@ static int multi_cpu_stop(void *data) /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(cpumask); + stop_machine_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { -- cgit v1.2.3 From 4838a54050284daac15dfeb1d65677e4dacf1bf5 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 14 Jun 2019 17:06:57 +0200 Subject: net: stmmac: Fix wrapper drivers not detecting PHY Because of PHYLINK conversion we stopped parsing the phy-handle property from DT. Unfortunatelly, some wrapper drivers still rely on this phy node to configure the PHY. Let's restore the parsing of PHY handle while these wrapper drivers are not fully converted to PHYLINK. Fixes: 74371272f97f ("net: stmmac: Convert to phylink and remove phylib logic") Reported-by: Corentin Labbe Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Tested-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 9 ++++++++- include/linux/stmmac.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ad007d8bf9d7..069951590018 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -958,7 +958,7 @@ static int stmmac_init_phy(struct net_device *dev) struct device_node *node; int ret; - node = priv->plat->phy_node; + node = priv->plat->phylink_node; if (node) { ret = phylink_of_phy_connect(priv->phylink, node, 0); @@ -980,7 +980,7 @@ static int stmmac_init_phy(struct net_device *dev) static int stmmac_phy_setup(struct stmmac_priv *priv) { - struct device_node *node = priv->plat->phy_node; + struct device_node *node = priv->plat->phylink_node; int mode = priv->plat->interface; struct phylink *phylink; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 898f94aced53..49adda9b0ad8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -381,7 +381,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) *mac = of_get_mac_address(np); plat->interface = of_get_phy_mode(np); - plat->phy_node = np; + + /* Some wrapper drivers still rely on phy_node. Let's save it while + * they are not converted to phylink. */ + plat->phy_node = of_parse_phandle(np, "phy-handle", 0); + + /* PHYLINK automatically parses the phy-handle property */ + plat->phylink_node = np; /* Get max speed of operation from device tree */ if (of_property_read_u32(np, "max-speed", &plat->max_speed)) @@ -577,6 +583,7 @@ error_pclk_get: void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { + of_node_put(plat->phy_node); of_node_put(plat->mdio_node); } #else diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 816edb545592..a3c2d9945bcf 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -151,6 +151,7 @@ struct plat_stmmacenet_data { int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; + struct device_node *phylink_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; -- cgit v1.2.3 From 82b11f071936a11094e1c44730030cd3d894e0b4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 Jun 2019 15:20:12 +0300 Subject: net/mlx5: Expose eswitch encap mode Add API to get the current Eswitch encap mode. It will be used in downstream patches to check if flow table can be created with encap support or not. Signed-off-by: Maor Gottlieb Reviewed-by: Petr Vorel Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++++++++ include/linux/mlx5/eswitch.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a4df109fbeb7..12010f85fa35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2457,6 +2457,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { if ((dev0->priv.eswitch->mode == SRIOV_NONE && diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index d81ee4df181c..174eec0871d9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include +#include #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -62,4 +63,15 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); + +#ifdef CONFIG_MLX5_ESWITCH +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +#else /* CONFIG_MLX5_ESWITCH */ +static inline enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + return DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3 From 7e770b252a62e7498cfa9411018100fd86e56d47 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 15 Jun 2019 12:09:30 +0200 Subject: net: stmmac: drop the reset GPIO from struct stmmac_mdio_bus_data No platform uses the "reset_gpio" field from stmmac_mdio_bus_data anymore. Drop it so we don't get any new consumers either. Plain GPIO numbers are being deprecated in favor of GPIO descriptors. If needed any new non-OF platform can add a GPIO descriptor lookup table. devm_gpiod_get_optional() will find the GPIO in that case. Suggested-by: Linus Walleij Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 29 +++++++---------------- include/linux/stmmac.h | 1 - 2 files changed, 9 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 4614f1f2bffb..459ef8afe4fb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -253,21 +253,15 @@ int stmmac_mdio_reset(struct mii_bus *bus) if (priv->device->of_node) { struct gpio_desc *reset_gpio; - if (data->reset_gpio < 0) { - reset_gpio = devm_gpiod_get_optional(priv->device, - "snps,reset", - GPIOD_OUT_LOW); - if (IS_ERR(reset_gpio)) - return PTR_ERR(reset_gpio); - - device_property_read_u32_array(priv->device, - "snps,reset-delays-us", - data->delays, 3); - } else { - reset_gpio = gpio_to_desc(data->reset_gpio); - - gpiod_direction_output(reset_gpio, 0); - } + reset_gpio = devm_gpiod_get_optional(priv->device, + "snps,reset", + GPIOD_OUT_LOW); + if (IS_ERR(reset_gpio)) + return PTR_ERR(reset_gpio); + + device_property_read_u32_array(priv->device, + "snps,reset-delays-us", + data->delays, 3); if (data->delays[0]) msleep(DIV_ROUND_UP(data->delays[0], 1000)); @@ -323,11 +317,6 @@ int stmmac_mdio_register(struct net_device *ndev) if (mdio_bus_data->irqs) memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq)); -#ifdef CONFIG_OF - if (priv->device->of_node) - mdio_bus_data->reset_gpio = -1; -#endif - new_bus->name = "stmmac"; if (priv->plat->has_xgmac) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a3c2d9945bcf..a0cc6fa4965b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -97,7 +97,6 @@ struct stmmac_mdio_bus_data { int *irqs; int probed_phy_irq; #ifdef CONFIG_OF - int reset_gpio; u32 delays[3]; #endif }; -- cgit v1.2.3 From ce4ab73ab0c27c6a3853695aa8ec0f453c6329cd Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 15 Jun 2019 12:09:31 +0200 Subject: net: stmmac: drop the reset delays from struct stmmac_mdio_bus_data Only OF platforms use the reset delays and these delays are only read in stmmac_mdio_reset(). Move them from struct stmmac_mdio_bus_data to a stack variable inside stmmac_mdio_reset() because that's the only usage of these delays. Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 15 ++++++++------- include/linux/stmmac.h | 3 --- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 459ef8afe4fb..c9454cf4f189 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -252,6 +252,7 @@ int stmmac_mdio_reset(struct mii_bus *bus) #ifdef CONFIG_OF if (priv->device->of_node) { struct gpio_desc *reset_gpio; + u32 delays[3]; reset_gpio = devm_gpiod_get_optional(priv->device, "snps,reset", @@ -261,18 +262,18 @@ int stmmac_mdio_reset(struct mii_bus *bus) device_property_read_u32_array(priv->device, "snps,reset-delays-us", - data->delays, 3); + delays, ARRAY_SIZE(delays)); - if (data->delays[0]) - msleep(DIV_ROUND_UP(data->delays[0], 1000)); + if (delays[0]) + msleep(DIV_ROUND_UP(delays[0], 1000)); gpiod_set_value_cansleep(reset_gpio, 1); - if (data->delays[1]) - msleep(DIV_ROUND_UP(data->delays[1], 1000)); + if (delays[1]) + msleep(DIV_ROUND_UP(delays[1], 1000)); gpiod_set_value_cansleep(reset_gpio, 0); - if (data->delays[2]) - msleep(DIV_ROUND_UP(data->delays[2], 1000)); + if (delays[2]) + msleep(DIV_ROUND_UP(delays[2], 1000)); } #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a0cc6fa4965b..7c8328edd501 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -96,9 +96,6 @@ struct stmmac_mdio_bus_data { unsigned int phy_mask; int *irqs; int probed_phy_irq; -#ifdef CONFIG_OF - u32 delays[3]; -#endif }; struct stmmac_dma_cfg { -- cgit v1.2.3 From fead5b1b5838ba2f231d76e1b8ed31a4e9449382 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 15 Jun 2019 12:09:32 +0200 Subject: net: stmmac: drop the phy_reset hook from struct stmmac_mdio_bus_data The phy_reset hook is not set anywhere. Drop it to make stmmac_mdio_reset() smaller. Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 ------ include/linux/stmmac.h | 1 - 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index c9454cf4f189..14aa3ee14082 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -247,7 +247,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; - struct stmmac_mdio_bus_data *data = priv->plat->mdio_bus_data; #ifdef CONFIG_OF if (priv->device->of_node) { @@ -277,11 +276,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) } #endif - if (data->phy_reset) { - netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n"); - data->phy_reset(priv->plat->bsp_priv); - } - /* This is a workaround for problems with the STE101P PHY. * It doesn't complete its reset until at least one clock cycle * on MDC, so perform a dummy mdio read. To be updated for GMAC4 diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7c8328edd501..6dfb5aa75b0c 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -92,7 +92,6 @@ /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { - int (*phy_reset)(void *priv); unsigned int phy_mask; int *irqs; int probed_phy_irq; -- cgit v1.2.3 From 013e868bc9465452c7b667830712ab57de236d08 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 15 May 2019 15:38:47 +0530 Subject: mfd: lp87565: Add support for 4-phase LP87561 combination Add support for 4-phase LP87561 combination. Data Sheet: https://www.ti.com/lit/ds/symlink/lp87561-q1.pdf Signed-off-by: Keerthy Signed-off-by: Lee Jones --- drivers/mfd/lp87565.c | 4 ++++ include/linux/mfd/lp87565.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/lp87565.c b/drivers/mfd/lp87565.c index 32d2a07d4354..8ad688fe75f9 100644 --- a/drivers/mfd/lp87565.c +++ b/drivers/mfd/lp87565.c @@ -33,6 +33,10 @@ static const struct of_device_id of_lp87565_match_table[] = { .compatible = "ti,lp87565-q1", .data = (void *)LP87565_DEVICE_TYPE_LP87565_Q1, }, + { + .compatible = "ti,lp87561-q1", + .data = (void *)LP87565_DEVICE_TYPE_LP87561_Q1, + }, {} }; MODULE_DEVICE_TABLE(of, of_lp87565_match_table); diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h index d0c91ba65525..976447607ea2 100644 --- a/include/linux/mfd/lp87565.h +++ b/include/linux/mfd/lp87565.h @@ -17,6 +17,7 @@ enum lp87565_device_type { LP87565_DEVICE_TYPE_UNKNOWN = 0, + LP87565_DEVICE_TYPE_LP87561_Q1, LP87565_DEVICE_TYPE_LP87565_Q1, }; @@ -249,6 +250,7 @@ enum LP87565_regulator_id { LP87565_BUCK_3, LP87565_BUCK_10, LP87565_BUCK_23, + LP87565_BUCK_3210, }; /** -- cgit v1.2.3 From c2ba8a15f310d915f8748dd8324c91c82b12b5ff Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 12 Jun 2019 11:57:30 +0200 Subject: jump_label: Batch updates if arch supports it If the architecture supports the batching of jump label updates, use it! An easy way to see the benefits of this patch is switching the schedstats on and off. For instance: -------------------------- %< ---------------------------- #!/bin/sh while [ true ]; do sysctl -w kernel.sched_schedstats=1 sleep 2 sysctl -w kernel.sched_schedstats=0 sleep 2 done -------------------------- >% ---------------------------- while watching the IPI count: -------------------------- %< ---------------------------- # watch -n1 "cat /proc/interrupts | grep Function" -------------------------- >% ---------------------------- With the current mode, it is possible to see +- 168 IPIs each 2 seconds, while with this patch the number of IPIs goes to 3 each 2 seconds. Regarding the performance impact of this patch set, I made two measurements: The time to update a key (the task that is causing the change) The time to run the int3 handler (the side effect on a thread that hits the code being changed) The schedstats static key was chosen as the key to being switched on and off. The reason being is that it is used in more than 56 places, in a hot path. The change in the schedstats static key will be done with the following command: while [ true ]; do sysctl -w kernel.sched_schedstats=1 usleep 500000 sysctl -w kernel.sched_schedstats=0 usleep 500000 done In this way, they key will be updated twice per second. To force the hit of the int3 handler, the system will also run a kernel compilation with two jobs per CPU. The test machine is a two nodes/24 CPUs box with an Intel Xeon processor @2.27GHz. Regarding the update part, on average, the regular kernel takes 57 ms to update the schedstats key, while the kernel with the batch updates takes just 1.4 ms on average. Although it seems to be too good to be true, it makes sense: the schedstats key is used in 56 places, so it was expected that it would take around 56 times to update the keys with the current implementation, as the IPIs are the most expensive part of the update. Regarding the int3 handler, the non-batch handler takes 45 ns on average, while the batch version takes around 180 ns. At first glance, it seems to be a high value. But it is not, considering that it is doing 56 updates, rather than one! It is taking four times more, only. This gain is possible because the patch uses a binary search in the vector: log2(56)=5.8. So, it was expected to have an overhead within four times. (voice of tv propaganda) But, that is not all! As the int3 handler keeps on for a shorter period (because the update part is on for a shorter time), the number of hits in the int3 handler decreased by 10%. The question then is: Is it worth paying the price of "135 ns" more in the int3 handler? Considering that, in this test case, we are saving the handling of 53 IPIs, that takes more than these 135 ns, it seems to be a meager price to be paid. Moreover, the test case was forcing the hit of the int3, in practice, it does not take that often. While the IPI takes place on all CPUs, hitting the int3 handler or not! For instance, in an isolated CPU with a process running in user-space (nohz_full use-case), the chances of hitting the int3 handler is barely zero, while there is no way to avoid the IPIs. By bounding the IPIs, we are improving a lot this scenario. Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Chris von Recklinghausen Cc: Clark Williams Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jason Baron Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Scott Wood Cc: Steven Rostedt (VMware) Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/acc891dbc2dbc9fd616dd680529a2337b1d1274c.1560325897.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 3 +++ kernel/jump_label.c | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 3e113a1fa0f1..3526c0aee954 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -215,6 +215,9 @@ extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); +extern bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type); +extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index ca00ac10d9b9..df3008419a1d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -414,6 +414,7 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return true; } +#ifndef HAVE_JUMP_LABEL_BATCH static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, @@ -424,6 +425,28 @@ static void __jump_label_update(struct static_key *key, arch_jump_label_transform(entry, jump_label_type(entry)); } } +#else +static void __jump_label_update(struct static_key *key, + struct jump_entry *entry, + struct jump_entry *stop, + bool init) +{ + for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { + + if (!jump_label_can_update(entry, init)) + continue; + + if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) { + /* + * Queue is full: Apply the current queue and try again. + */ + arch_jump_label_transform_apply(); + BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry))); + } + } + arch_jump_label_transform_apply(); +} +#endif void __init jump_label_init(void) { -- cgit v1.2.3 From 9ffbe8ac05dbb4ab4a4836a55a47fc6be945a38f Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 31 May 2019 13:06:51 +0300 Subject: locking/lockdep: Rename lockdep_assert_held_exclusive() -> lockdep_assert_held_write() All callers of lockdep_assert_held_exclusive() use it to verify the correct locking state of either a semaphore (ldisc_sem in tty, mmap_sem for perf events, i_rwsem of inode for dax) or rwlock by apparmor. Thus it makes sense to rename _exclusive to _write since that's the semantics callers care. Additionally there is already lockdep_assert_held_read(), which this new naming is more consistent with. No functional changes. Signed-off-by: Nikolay Borisov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190531100651.3969-1-nborisov@suse.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- drivers/infiniband/core/device.c | 2 +- drivers/tty/tty_ldisc.c | 8 ++++---- fs/dax.c | 2 +- include/linux/lockdep.h | 4 ++-- security/apparmor/label.c | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..cf91d80b8452 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(&mm->mmap_sem); + lockdep_assert_held_write(&mm->mmap_sem); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 29f7b15c81d9..d020bb4d03d5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -457,7 +457,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name) int rc; int i; - lockdep_assert_held_exclusive(&devices_rwsem); + lockdep_assert_held_write(&devices_rwsem); ida_init(&inuse); xa_for_each (&devices, index, device) { char buf[IB_DEVICE_NAME_MAX]; diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index e38f104db174..fde8d4073e74 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -487,7 +487,7 @@ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) { - lockdep_assert_held_exclusive(&tty->ldisc_sem); + lockdep_assert_held_write(&tty->ldisc_sem); WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); clear_bit(TTY_LDISC_OPEN, &tty->flags); if (ld->ops->close) @@ -509,7 +509,7 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld) struct tty_ldisc *disc = tty_ldisc_get(tty, ld); int r; - lockdep_assert_held_exclusive(&tty->ldisc_sem); + lockdep_assert_held_write(&tty->ldisc_sem); if (IS_ERR(disc)) return PTR_ERR(disc); tty->ldisc = disc; @@ -633,7 +633,7 @@ EXPORT_SYMBOL_GPL(tty_set_ldisc); */ static void tty_ldisc_kill(struct tty_struct *tty) { - lockdep_assert_held_exclusive(&tty->ldisc_sem); + lockdep_assert_held_write(&tty->ldisc_sem); if (!tty->ldisc) return; /* @@ -681,7 +681,7 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) struct tty_ldisc *ld; int retval; - lockdep_assert_held_exclusive(&tty->ldisc_sem); + lockdep_assert_held_write(&tty->ldisc_sem); ld = tty_ldisc_get(tty, disc); if (IS_ERR(ld)) { BUG_ON(disc == N_TTY); diff --git a/fs/dax.c b/fs/dax.c index 2e48c7ebb973..bf8686d48b2d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1188,7 +1188,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned flags = 0; if (iov_iter_rw(iter) == WRITE) { - lockdep_assert_held_exclusive(&inode->i_rwsem); + lockdep_assert_held_write(&inode->i_rwsem); flags |= IOMAP_WRITE; } else { lockdep_assert_held(&inode->i_rwsem); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 30a0f81aa130..151d55711082 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -394,7 +394,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); WARN_ON(debug_locks && !lockdep_is_held(l)); \ } while (0) -#define lockdep_assert_held_exclusive(l) do { \ +#define lockdep_assert_held_write(l) do { \ WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \ } while (0) @@ -479,7 +479,7 @@ struct lockdep_map { }; #define lockdep_is_held_type(l, r) (1) #define lockdep_assert_held(l) do { (void)(l); } while (0) -#define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0) +#define lockdep_assert_held_write(l) do { (void)(l); } while (0) #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 068e93c5d29c..59f1cc2557a7 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -76,7 +76,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new) AA_BUG(!orig); AA_BUG(!new); - lockdep_assert_held_exclusive(&labels_set(orig)->lock); + lockdep_assert_held_write(&labels_set(orig)->lock); tmp = rcu_dereference_protected(orig->proxy->label, &labels_ns(orig)->lock); @@ -566,7 +566,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new) AA_BUG(!ls); AA_BUG(!label); - lockdep_assert_held_exclusive(&ls->lock); + lockdep_assert_held_write(&ls->lock); if (new) __aa_proxy_redirect(label, new); @@ -603,7 +603,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) AA_BUG(!ls); AA_BUG(!old); AA_BUG(!new); - lockdep_assert_held_exclusive(&ls->lock); + lockdep_assert_held_write(&ls->lock); AA_BUG(new->flags & FLAG_IN_TREE); if (!label_is_stale(old)) @@ -640,7 +640,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, AA_BUG(!ls); AA_BUG(!label); AA_BUG(labels_set(label) != ls); - lockdep_assert_held_exclusive(&ls->lock); + lockdep_assert_held_write(&ls->lock); AA_BUG(label->flags & FLAG_IN_TREE); /* Figure out where to put new node */ -- cgit v1.2.3 From c71fd893f614f205dbc050d60299cc5496491c19 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 May 2019 16:59:00 -0400 Subject: locking/rwsem: Make owner available even if !CONFIG_RWSEM_SPIN_ON_OWNER The owner field in the rw_semaphore structure is used primarily for optimistic spinning. However, identifying the rwsem owner can also be helpful in debugging as well as tracing locking related issues when analyzing crash dump. The owner field may also store state information that can be important to the operation of the rwsem. So the owner field is now made a permanent member of the rw_semaphore structure irrespective of CONFIG_RWSEM_SPIN_ON_OWNER. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Link: https://lkml.kernel.org/r/20190520205918.22251-2-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 9 +++++---- kernel/locking/rwsem-xadd.c | 2 +- kernel/locking/rwsem.h | 23 ----------------------- lib/Kconfig.debug | 8 ++++---- 4 files changed, 10 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 2ea18a3def04..148983e21d47 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -34,12 +34,12 @@ */ struct rw_semaphore { atomic_long_t count; -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* - * Write owner. Used as a speculative check to see - * if the owner is running on the cpu. + * Write owner or one of the read owners. Can be used as a + * speculative check to see if the owner is running on the cpu. */ struct task_struct *owner; +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; @@ -73,13 +73,14 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER -#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL +#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ { __RWSEM_INIT_COUNT(name), \ + .owner = NULL, \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 0b1f77957240..c0500679fd2f 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -86,8 +86,8 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER sem->owner = NULL; +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER osq_lock_init(&sem->osq); #endif } diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 64877f5294e3..eb9c8534299b 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -61,7 +61,6 @@ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * All writes to owner are protected by WRITE_ONCE() to make sure that * store tearing can't happen as optimistic spinners may read and use @@ -126,7 +125,6 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner) * real owner or one of the real owners. The only exception is when the * unlock is done by up_read_non_owner(). */ -#define rwsem_clear_reader_owned rwsem_clear_reader_owned static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { unsigned long val = (unsigned long)current | RWSEM_READER_OWNED @@ -135,28 +133,7 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) cmpxchg_relaxed((unsigned long *)&sem->owner, val, RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED); } -#endif - #else -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ -} - -static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, - struct task_struct *owner) -{ -} - -static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) -{ -} -#endif - -#ifndef rwsem_clear_reader_owned static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cbdfae379896..417bdd9e80fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1095,7 +1095,7 @@ config PROVE_LOCKING select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES - select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER + select DEBUG_RWSEMS select DEBUG_WW_MUTEX_SLOWPATH select DEBUG_LOCK_ALLOC select TRACE_IRQFLAGS @@ -1199,10 +1199,10 @@ config DEBUG_WW_MUTEX_SLOWPATH config DEBUG_RWSEMS bool "RW Semaphore debugging: basic checks" - depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER + depends on DEBUG_KERNEL help - This debugging feature allows mismatched rw semaphore locks and unlocks - to be detected and reported. + This debugging feature allows mismatched rw semaphore locks + and unlocks to be detected and reported. config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" -- cgit v1.2.3 From 00f3c5a3df2c1e3dab14d0dd2b71f852d46be97f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 May 2019 16:59:07 -0400 Subject: locking/rwsem: Always release wait_lock before waking up tasks With the use of wake_q, we can do task wakeups without holding the wait_lock. There is one exception in the rwsem code, though. It is when the writer in the slowpath detects that there are waiters ahead but the rwsem is not held by a writer. This can lead to a long wait_lock hold time especially when a large number of readers are to be woken up. Remediate this situation by releasing the wait_lock before waking up tasks and re-acquiring it afterward. The rwsem_try_write_lock() function is also modified to read the rwsem count directly to avoid stale count value. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Link: https://lkml.kernel.org/r/20190520205918.22251-9-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched/wake_q.h | 5 +++++ kernel/locking/rwsem.c | 31 +++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index ad826d2a4557..26a2013ac39c 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -51,6 +51,11 @@ static inline void wake_q_init(struct wake_q_head *head) head->lastp = &head->first; } +static inline bool wake_q_empty(struct wake_q_head *head) +{ + return head->first == WAKE_Q_TAIL; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index decda9fb8c6d..5532304406f7 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -400,13 +400,14 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * If wstate is WRITER_HANDOFF, it will make sure that either the handoff * bit is set or the lock is acquired with handoff bit cleared. */ -static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem, +static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, enum writer_wait_state wstate) { - long new; + long count, new; lockdep_assert_held(&sem->wait_lock); + count = atomic_long_read(&sem->count); do { bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); @@ -751,26 +752,25 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) ? RWSEM_WAKE_READERS : RWSEM_WAKE_ANY, &wake_q); - /* - * The wakeup is normally called _after_ the wait_lock - * is released, but given that we are proactively waking - * readers we can deal with the wake_q overhead as it is - * similar to releasing and taking the wait_lock again - * for attempting rwsem_try_write_lock(). - */ - wake_up_q(&wake_q); - - /* We need wake_q again below, reinitialize */ - wake_q_init(&wake_q); + if (!wake_q_empty(&wake_q)) { + /* + * We want to minimize wait_lock hold time especially + * when a large number of readers are to be woken up. + */ + raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); + wake_q_init(&wake_q); /* Used again, reinit */ + raw_spin_lock_irq(&sem->wait_lock); + } } else { - count = atomic_long_add_return(RWSEM_FLAG_WAITERS, &sem->count); + atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); } wait: /* wait until we successfully acquire the lock */ set_current_state(state); while (true) { - if (rwsem_try_write_lock(count, sem, wstate)) + if (rwsem_try_write_lock(sem, wstate)) break; raw_spin_unlock_irq(&sem->wait_lock); @@ -811,7 +811,6 @@ wait: } raw_spin_lock_irq(&sem->wait_lock); - count = atomic_long_read(&sem->count); } __set_current_state(TASK_RUNNING); list_del(&waiter.list); -- cgit v1.2.3 From 02f1082b003a0cd48f48f12533d969cdbf1c2b63 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 May 2019 16:59:10 -0400 Subject: locking/rwsem: Clarify usage of owner's nonspinaable bit Bit 1 of sem->owner (RWSEM_ANONYMOUSLY_OWNED) is used to designate an anonymous owner - readers or an anonymous writer. The setting of this anonymous bit is used as an indicator that optimistic spinning cannot be done on this rwsem. With the upcoming reader optimistic spinning patches, a reader-owned rwsem can be spinned on for a limit period of time. We still need this bit to indicate a rwsem is nonspinnable, but not setting this bit loses its meaning that the owner is known. So rename the bit to RWSEM_NONSPINNABLE to clarify its meaning. This patch also fixes a DEBUG_RWSEMS_WARN_ON() bug in __up_write(). Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Link: https://lkml.kernel.org/r/20190520205918.22251-12-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 2 +- kernel/locking/rwsem.c | 43 +++++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 148983e21d47..bb76e82398b2 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -50,7 +50,7 @@ struct rw_semaphore { }; /* - * Setting bit 1 of the owner field but not bit 0 will indicate + * Setting all bits of the owner field except bit 0 will indicate * that the rwsem is writer-owned with an unknown owner. */ #define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index ded96023f4dc..180455b6b0d4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -33,17 +33,18 @@ /* * The least significant 2 bits of the owner value has the following * meanings when set. - * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers - * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned, - * i.e. the owner(s) cannot be readily determined. It can be reader - * owned or the owning writer is indeterminate. + * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers + * - Bit 1: RWSEM_NONSPINNABLE - Waiters cannot spin on the rwsem + * The rwsem is anonymously owned, i.e. the owner(s) cannot be + * readily determined. It can be reader owned or the owning writer + * is indeterminate. * * When a writer acquires a rwsem, it puts its task_struct pointer * into the owner field. It is cleared after an unlock. * * When a reader acquires a rwsem, it will also puts its task_struct * pointer into the owner field with both the RWSEM_READER_OWNED and - * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will + * RWSEM_NONSPINNABLE bits set. On unlock, the owner field will * largely be left untouched. So for a free or reader-owned rwsem, * the owner value may contain information about the last reader that * acquires the rwsem. The anonymous bit is set because that particular @@ -55,7 +56,8 @@ * a rwsem, but the overhead is simply too big. */ #define RWSEM_READER_OWNED (1UL << 0) -#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) +#define RWSEM_NONSPINNABLE (1UL << 1) +#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) #ifdef CONFIG_DEBUG_RWSEMS # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ @@ -132,7 +134,7 @@ static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, struct task_struct *owner) { unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED - | RWSEM_ANONYMOUSLY_OWNED; + | RWSEM_NONSPINNABLE; WRITE_ONCE(sem->owner, (struct task_struct *)val); } @@ -144,20 +146,12 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) /* * Return true if the a rwsem waiter can spin on the rwsem's owner - * and steal the lock, i.e. the lock is not anonymously owned. + * and steal the lock. * N.B. !owner is considered spinnable. */ static inline bool is_rwsem_owner_spinnable(struct task_struct *owner) { - return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED); -} - -/* - * Return true if rwsem is owned by an anonymous writer or readers. - */ -static inline bool rwsem_has_anonymous_owner(struct task_struct *owner) -{ - return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED; + return !((unsigned long)owner & RWSEM_NONSPINNABLE); } #ifdef CONFIG_DEBUG_RWSEMS @@ -170,10 +164,10 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner) static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { unsigned long val = (unsigned long)current | RWSEM_READER_OWNED - | RWSEM_ANONYMOUSLY_OWNED; + | RWSEM_NONSPINNABLE; if (READ_ONCE(sem->owner) == (struct task_struct *)val) cmpxchg_relaxed((unsigned long *)&sem->owner, val, - RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED); + RWSEM_READER_OWNED | RWSEM_NONSPINNABLE); } #else static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) @@ -495,7 +489,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) struct task_struct *owner; bool ret = true; - BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN)); + BUILD_BUG_ON(is_rwsem_owner_spinnable(RWSEM_OWNER_UNKNOWN)); if (need_resched()) return false; @@ -534,7 +528,7 @@ static inline enum owner_state rwsem_owner_state(unsigned long owner) if (!owner) return OWNER_NULL; - if (owner & RWSEM_ANONYMOUSLY_OWNED) + if (owner & RWSEM_NONSPINNABLE) return OWNER_NONSPINNABLE; if (owner & RWSEM_READER_OWNED) @@ -1043,7 +1037,12 @@ static inline void __up_write(struct rw_semaphore *sem) { long tmp; - DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); + /* + * sem->owner may differ from current if the ownership is transferred + * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. + */ + DEBUG_RWSEMS_WARN_ON((sem->owner != current) && + !((long)sem->owner & RWSEM_NONSPINNABLE), sem); rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) -- cgit v1.2.3 From 94a9717b3c40e77a54e4afacd8f19a9a86bfeead Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 May 2019 16:59:12 -0400 Subject: locking/rwsem: Make rwsem->owner an atomic_long_t The rwsem->owner contains not just the task structure pointer, it also holds some flags for storing the current state of the rwsem. Some of the flags may have to be atomically updated. To reflect the new reality, the owner is now changed to an atomic_long_t type. New helper functions are added to properly separate out the task structure pointer and the embedded flags. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Link: https://lkml.kernel.org/r/20190520205918.22251-14-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 4 +- include/linux/rwsem.h | 11 ++-- kernel/locking/rwsem.c | 125 +++++++++++++++++++++++++++---------------- 3 files changed, 88 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 03cb4b6f842e..0a43830f1932 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -117,7 +117,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, lock_release(&sem->rw_sem.dep_map, 1, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) - sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN; + atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); #endif } @@ -127,7 +127,7 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) - sem->rw_sem.owner = current; + atomic_long_set(&sem->rw_sem.owner, (long)current); #endif } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index bb76e82398b2..e401358c4e7e 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -35,10 +35,11 @@ struct rw_semaphore { atomic_long_t count; /* - * Write owner or one of the read owners. Can be used as a - * speculative check to see if the owner is running on the cpu. + * Write owner or one of the read owners as well flags regarding + * the current state of the rwsem. Can be used as a speculative + * check to see if the write owner is running on the cpu. */ - struct task_struct *owner; + atomic_long_t owner; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif @@ -53,7 +54,7 @@ struct rw_semaphore { * Setting all bits of the owner field except bit 0 will indicate * that the rwsem is writer-owned with an unknown owner. */ -#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) +#define RWSEM_OWNER_UNKNOWN (-2L) /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) @@ -80,7 +81,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #define __RWSEM_INITIALIZER(name) \ { __RWSEM_INIT_COUNT(name), \ - .owner = NULL, \ + .owner = ATOMIC_LONG_INIT(0), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 985a03ad3f8c..fae557be8334 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -64,7 +64,7 @@ if (!debug_locks_silent && \ WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ #c, atomic_long_read(&(sem)->count), \ - (long)((sem)->owner), (long)current, \ + atomic_long_read(&(sem)->owner), (long)current, \ list_empty(&(sem)->wait_list) ? "" : "not ")) \ debug_locks_off(); \ } while (0) @@ -114,12 +114,20 @@ */ static inline void rwsem_set_owner(struct rw_semaphore *sem) { - WRITE_ONCE(sem->owner, current); + atomic_long_set(&sem->owner, (long)current); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) { - WRITE_ONCE(sem->owner, NULL); + atomic_long_set(&sem->owner, 0); +} + +/* + * Test the flags in the owner field. + */ +static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) +{ + return atomic_long_read(&sem->owner) & flags; } /* @@ -133,10 +141,9 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem) static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, struct task_struct *owner) { - unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED - | RWSEM_NONSPINNABLE; + unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | RWSEM_NONSPINNABLE; - WRITE_ONCE(sem->owner, (struct task_struct *)val); + atomic_long_set(&sem->owner, val); } static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) @@ -145,13 +152,20 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) } /* - * Return true if the a rwsem waiter can spin on the rwsem's owner - * and steal the lock. - * N.B. !owner is considered spinnable. + * Return true if the rwsem is owned by a reader. */ -static inline bool is_rwsem_owner_spinnable(struct task_struct *owner) +static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) { - return !((unsigned long)owner & RWSEM_NONSPINNABLE); +#ifdef CONFIG_DEBUG_RWSEMS + /* + * Check the count to see if it is write-locked. + */ + long count = atomic_long_read(&sem->count); + + if (count & RWSEM_WRITER_MASK) + return false; +#endif + return rwsem_test_oflags(sem, RWSEM_READER_OWNED); } #ifdef CONFIG_DEBUG_RWSEMS @@ -163,11 +177,13 @@ static inline bool is_rwsem_owner_spinnable(struct task_struct *owner) */ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { - unsigned long val = (unsigned long)current | RWSEM_READER_OWNED - | RWSEM_NONSPINNABLE; - if (READ_ONCE(sem->owner) == (struct task_struct *)val) - cmpxchg_relaxed((unsigned long *)&sem->owner, val, - RWSEM_READER_OWNED | RWSEM_NONSPINNABLE); + unsigned long val = atomic_long_read(&sem->owner); + + while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) { + if (atomic_long_try_cmpxchg(&sem->owner, &val, + val & RWSEM_OWNER_FLAGS_MASK)) + return; + } } #else static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) @@ -175,6 +191,28 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) } #endif +/* + * Return just the real task structure pointer of the owner + */ +static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) +{ + return (struct task_struct *) + (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); +} + +/* + * Return the real task structure pointer of the owner and the embedded + * flags in the owner. pflags must be non-NULL. + */ +static inline struct task_struct * +rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags) +{ + unsigned long owner = atomic_long_read(&sem->owner); + + *pflags = owner & RWSEM_OWNER_FLAGS_MASK; + return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK); +} + /* * Guide to the rw_semaphore's count field. * @@ -208,7 +246,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); - sem->owner = NULL; + atomic_long_set(&sem->owner, 0L); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER osq_lock_init(&sem->osq); #endif @@ -511,9 +549,10 @@ static inline bool owner_on_cpu(struct task_struct *owner) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; + unsigned long flags; bool ret = true; - BUILD_BUG_ON(is_rwsem_owner_spinnable(RWSEM_OWNER_UNKNOWN)); + BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE)); if (need_resched()) { lockevent_inc(rwsem_opt_fail); @@ -522,11 +561,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) preempt_disable(); rcu_read_lock(); - owner = READ_ONCE(sem->owner); - if (owner) { - ret = is_rwsem_owner_spinnable(owner) && - owner_on_cpu(owner); - } + owner = rwsem_owner_flags(sem, &flags); + if ((flags & RWSEM_NONSPINNABLE) || (owner && !owner_on_cpu(owner))) + ret = false; rcu_read_unlock(); preempt_enable(); @@ -553,25 +590,26 @@ enum owner_state { }; #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER) -static inline enum owner_state rwsem_owner_state(unsigned long owner) +static inline enum owner_state +rwsem_owner_state(struct task_struct *owner, unsigned long flags) { - if (!owner) - return OWNER_NULL; - - if (owner & RWSEM_NONSPINNABLE) + if (flags & RWSEM_NONSPINNABLE) return OWNER_NONSPINNABLE; - if (owner & RWSEM_READER_OWNED) + if (flags & RWSEM_READER_OWNED) return OWNER_READER; - return OWNER_WRITER; + return owner ? OWNER_WRITER : OWNER_NULL; } static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) { - struct task_struct *tmp, *owner = READ_ONCE(sem->owner); - enum owner_state state = rwsem_owner_state((unsigned long)owner); + struct task_struct *new, *owner; + unsigned long flags, new_flags; + enum owner_state state; + owner = rwsem_owner_flags(sem, &flags); + state = rwsem_owner_state(owner, flags); if (state != OWNER_WRITER) return state; @@ -582,9 +620,9 @@ static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - tmp = READ_ONCE(sem->owner); - if (tmp != owner) { - state = rwsem_owner_state((unsigned long)tmp); + new = rwsem_owner_flags(sem, &new_flags); + if ((new != owner) || (new_flags != flags)) { + state = rwsem_owner_state(new, new_flags); break; } @@ -1001,8 +1039,7 @@ inline void __down_read(struct rw_semaphore *sem) if (unlikely(atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count) & RWSEM_READ_FAILED_MASK)) { rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE); - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED), sem); + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); } else { rwsem_set_reader_owned(sem); } @@ -1014,8 +1051,7 @@ static inline int __down_read_killable(struct rw_semaphore *sem) &sem->count) & RWSEM_READ_FAILED_MASK)) { if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE))) return -EINTR; - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED), sem); + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); } else { rwsem_set_reader_owned(sem); } @@ -1084,7 +1120,7 @@ inline void __up_read(struct rw_semaphore *sem) { long tmp; - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), sem); + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); rwsem_clear_reader_owned(sem); tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == @@ -1103,8 +1139,8 @@ static inline void __up_write(struct rw_semaphore *sem) * sem->owner may differ from current if the ownership is transferred * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. */ - DEBUG_RWSEMS_WARN_ON((sem->owner != current) && - !((long)sem->owner & RWSEM_NONSPINNABLE), sem); + DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && + !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) @@ -1125,7 +1161,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); + DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem); tmp = atomic_long_fetch_add_release( -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); rwsem_set_reader_owned(sem); @@ -1296,8 +1332,7 @@ EXPORT_SYMBOL(down_write_killable_nested); void up_read_non_owner(struct rw_semaphore *sem) { - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), - sem); + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); __up_read(sem); } EXPORT_SYMBOL(up_read_non_owner); -- cgit v1.2.3 From 9ed7d75b2f09d836e71d597cd5879abb1a44e7a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Feb 2019 09:48:51 +0100 Subject: x86/percpu: Relax smp_processor_id() Nadav reported that since this_cpu_read() became asm-volatile, many smp_processor_id() users generated worse code due to the extra constraints. However since smp_processor_id() is reading a stable value, we can use __this_cpu_read(). While this does reduce text size somewhat, this mostly results in code movement to .text.unlikely as a result of more/larger .cold. subfunctions. Less text on the hotpath is good for I$. $ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o setup_APIC_ibs 90 98 -12,+20 force_ibs_eilvt_setup 400 413 -57,+70 pci_serr_error 109 104 -54,+49 pci_serr_error 109 104 -54,+49 unknown_nmi_error 125 120 -76,+71 unknown_nmi_error 125 120 -76,+71 io_check_error 125 132 -97,+104 intel_thermal_interrupt 730 822 +92,+0 intel_init_thermal 951 945 -6,+0 generic_get_mtrr 301 294 -7,+0 generic_get_mtrr 301 294 -7,+0 generic_set_all 749 754 -44,+49 get_fixed_ranges 352 360 -41,+49 x86_acpi_suspend_lowlevel 369 363 -6,+0 check_tsc_sync_source 412 412 -71,+71 irq_migrate_all_off_this_cpu 662 674 -14,+26 clocksource_watchdog 748 748 -113,+113 __perf_event_account_interrupt 204 197 -7,+0 attempt_merge 1748 1741 -7,+0 intel_guc_send_ct 1424 1409 -15,+0 __fini_doorbell 235 231 -4,+0 bdw_set_cdclk 928 923 -5,+0 gen11_dsi_disable 1571 1556 -15,+0 gmbus_wait 493 488 -5,+0 md_make_request 376 369 -7,+0 __split_and_process_bio 543 536 -7,+0 delay_tsc 96 89 -7,+0 hsw_disable_pc8 696 691 -5,+0 tsc_verify_tsc_adjust 215 228 -22,+35 cpuidle_driver_unref 56 49 -7,+0 blk_account_io_completion 159 148 -11,+0 mtrr_wrmsr 95 99 -29,+33 __intel_wait_for_register_fw 401 419 +18,+0 cpuidle_driver_ref 43 36 -7,+0 cpuidle_get_driver 15 8 -7,+0 blk_account_io_done 535 528 -7,+0 irq_migrate_all_off_this_cpu 662 674 -14,+26 check_tsc_sync_source 412 412 -71,+71 irq_wait_for_poll 170 163 -7,+0 generic_end_io_acct 329 322 -7,+0 x86_acpi_suspend_lowlevel 369 363 -6,+0 nohz_balance_enter_idle 198 191 -7,+0 generic_start_io_acct 254 247 -7,+0 blk_account_io_start 341 334 -7,+0 perf_event_task_tick 682 675 -7,+0 intel_init_thermal 951 945 -6,+0 amd_e400_c1e_apic_setup 47 51 -28,+32 setup_APIC_eilvt 350 328 -22,+0 hsw_enable_pc8 1611 1605 -6,+0 total 12985947 12985892 -994,+939 Reported-by: Nadav Amit Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 3 ++- include/linux/smp.h | 45 +++++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index da545df207b2..0d3fe060a44f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -162,7 +162,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (this_cpu_read(cpu_number)) +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) #ifdef CONFIG_X86_32 extern int safe_smp_processor_id(void); diff --git a/include/linux/smp.h b/include/linux/smp.h index a56f08ff3097..aa9e5e82d8c3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -181,29 +181,46 @@ static inline int get_boot_cpu_id(void) #endif /* !SMP */ -/* - * smp_processor_id(): get the current CPU ID. +/** + * raw_processor_id() - get the current (unstable) CPU id + * + * For then you know what you are doing and need an unstable + * CPU id. + */ + +/** + * smp_processor_id() - get the current (stable) CPU id + * + * This is the normal accessor to the CPU id and should be used + * whenever possible. + * + * The CPU id is stable when: * - * if DEBUG_PREEMPT is enabled then we check whether it is - * used in a preemption-safe way. (smp_processor_id() is safe - * if it's used in a preemption-off critical section, or in - * a thread that is bound to the current CPU.) + * - IRQs are disabled; + * - preemption is disabled; + * - the task is CPU affine. * - * NOTE: raw_smp_processor_id() is for internal use only - * (smp_processor_id() is the preferred variant), but in rare - * instances it might also be used to turn off false positives - * (i.e. smp_processor_id() use that the debugging code reports but - * which use for some reason is legal). Don't use this to hack around - * the warning message, as your code might not work under PREEMPT. + * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN + * when smp_processor_id() is used when the CPU id is not stable. */ + +/* + * Allow the architecture to differentiate between a stable and unstable read. + * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a + * regular asm read for the stable. + */ +#ifndef __smp_processor_id +#define __smp_processor_id(x) raw_smp_processor_id(x) +#endif + #ifdef CONFIG_DEBUG_PREEMPT extern unsigned int debug_smp_processor_id(void); # define smp_processor_id() debug_smp_processor_id() #else -# define smp_processor_id() raw_smp_processor_id() +# define smp_processor_id() __smp_processor_id() #endif -#define get_cpu() ({ preempt_disable(); smp_processor_id(); }) +#define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) #define put_cpu() preempt_enable() /* -- cgit v1.2.3 From e7488e58c7cfe4be0c52db68622a0397bb75258e Mon Sep 17 00:00:00 2001 From: Yurii Pavlovskyi Date: Tue, 14 May 2019 20:59:01 +0200 Subject: platform/x86: wmi: Add function to get _UID of WMI device Add a new function to acpi.h / wmi.c that returns _UID of the ACPI WMI device. For example, it returns "ATK" for the following declaration in DSDT: Device (ATKD) { Name (_HID, "PNP0C14" /* Windows Management Instrumentation Device */) // _HID: Hardware ID Name (_UID, "ATK") // _UID: Unique ID .. Generally, it is possible that multiple PNP0C14 ACPI devices are present in the system as mentioned in the commit message of commit bff431e49ff5 ("ACPI: WMI: Add ACPI-WMI mapping driver"). Therefore the _UID is returned for a specific ACPI device that declares the given GUID, to which it is also mapped by other methods of wmi module. Signed-off-by: Yurii Pavlovskyi Signed-off-by: Andy Shevchenko --- drivers/platform/x86/wmi.c | 19 +++++++++++++++++++ include/linux/acpi.h | 1 + 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 7b26b6ccf1a0..b08ffb769cbe 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -635,6 +635,25 @@ bool wmi_has_guid(const char *guid_string) } EXPORT_SYMBOL_GPL(wmi_has_guid); +/** + * wmi_get_acpi_device_uid() - Get _UID name of ACPI device that defines GUID + * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * + * Find the _UID of ACPI device associated with this WMI GUID. + * + * Return: The ACPI _UID field value or NULL if the WMI GUID was not found + */ +char *wmi_get_acpi_device_uid(const char *guid_string) +{ + struct wmi_block *wblock = NULL; + + if (!find_guid(guid_string, &wblock)) + return NULL; + + return acpi_device_uid(wblock->acpi_device); +} +EXPORT_SYMBOL_GPL(wmi_get_acpi_device_uid); + static struct wmi_block *dev_to_wblock(struct device *dev) { return container_of(dev, struct wmi_block, dev.dev); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 98440df7fe42..d867a9a904f9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -380,6 +380,7 @@ extern acpi_status wmi_install_notify_handler(const char *guid, extern acpi_status wmi_remove_notify_handler(const char *guid); extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out); extern bool wmi_has_guid(const char *guid); +extern char *wmi_get_acpi_device_uid(const char *guid); #endif /* CONFIG_ACPI_WMI */ -- cgit v1.2.3 From e0668f28888184f6c633110a37386f2d4a6fa00e Mon Sep 17 00:00:00 2001 From: Yurii Pavlovskyi Date: Tue, 14 May 2019 21:00:31 +0200 Subject: platform/x86: asus-wmi: Improve DSTS WMI method ID detection The DSTS method detection mistakenly selects DCTS instead of DSTS if nothing is returned when the method ID is not defined in WMNB. As a result, the control of keyboard backlight is not functional for TUF Gaming series laptops. Implement detection based on _UID of the WMI device instead. There is evidence that DCTS is handled by ACPI WMI devices that have _UID ASUSWMI, whereas none of the devices without ASUSWMI respond to DCTS and DSTS is used instead [1]. DSDT examples: FX505GM (_UID ATK): Method (WMNB, 3, Serialized) { ... If ((Local0 == 0x53545344)) { ... Return (Zero) } ... // No return } K54C (_UID ATK): Method (WMNB, 3, Serialized) { ... If ((Local0 == 0x53545344)) { ... Return (0x02) } ... Return (0xFFFFFFFE) } [1] Link: https://lkml.org/lkml/2019/4/11/322 Signed-off-by: Yurii Pavlovskyi Suggested-by: Daniel Drake Signed-off-by: Andy Shevchenko --- drivers/hid/hid-asus.c | 2 +- drivers/platform/x86/asus-wmi.c | 23 ++++++++++++++++++++--- include/linux/platform_data/x86/asus-wmi.h | 4 ++-- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 336aeaed1159..1d01fe23ca0c 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -396,7 +396,7 @@ static bool asus_kbd_wmi_led_control_present(struct hid_device *hdev) if (!IS_ENABLED(CONFIG_ASUS_WMI)) return false; - ret = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS2, + ret = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS, ASUS_WMI_DEVID_KBD_BACKLIGHT, 0, &value); hid_dbg(hdev, "WMI backlight check: rc %d value %x", ret, value); if (ret) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index c67f11e0d6e7..ef526dcfeac5 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -83,6 +83,8 @@ MODULE_LICENSE("GPL"); #define USB_INTEL_XUSB2PR 0xD0 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +#define ASUS_ACPI_UID_ASUSWMI "ASUSWMI" + static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static bool ashs_present(void) @@ -1874,6 +1876,8 @@ static int asus_wmi_sysfs_init(struct platform_device *device) */ static int asus_wmi_platform_init(struct asus_wmi *asus) { + struct device *dev = &asus->platform_device->dev; + char *wmi_uid; int rv; /* INIT enable hotkeys on some models */ @@ -1903,11 +1907,24 @@ static int asus_wmi_platform_init(struct asus_wmi *asus) * Note, on most Eeepc, there is no way to check if a method exist * or note, while on notebooks, they returns 0xFFFFFFFE on failure, * but once again, SPEC may probably be used for that kind of things. + * + * Additionally at least TUF Gaming series laptops return nothing for + * unknown methods, so the detection in this way is not possible. + * + * There is strong indication that only ACPI WMI devices that have _UID + * equal to "ASUSWMI" use DCTS whereas those with "ATK" use DSTS. */ - if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS, 0, 0, NULL)) + wmi_uid = wmi_get_acpi_device_uid(ASUS_WMI_MGMT_GUID); + if (!wmi_uid) + return -ENODEV; + + if (!strcmp(wmi_uid, ASUS_ACPI_UID_ASUSWMI)) { + dev_info(dev, "Detected ASUSWMI, use DCTS\n"); + asus->dsts_id = ASUS_WMI_METHODID_DCTS; + } else { + dev_info(dev, "Detected %s, not ASUSWMI, use DSTS\n", wmi_uid); asus->dsts_id = ASUS_WMI_METHODID_DSTS; - else - asus->dsts_id = ASUS_WMI_METHODID_DSTS2; + } /* CWAP allow to define the behavior of the Fn+F2 key, * this method doesn't seems to be present on Eee PCs */ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index bfba245636a7..0668f76df921 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -18,8 +18,8 @@ #define ASUS_WMI_METHODID_GDSP 0x50534447 /* Get DiSPlay output */ #define ASUS_WMI_METHODID_DEVP 0x50564544 /* DEVice Policy */ #define ASUS_WMI_METHODID_OSVR 0x5256534F /* OS VeRsion */ -#define ASUS_WMI_METHODID_DSTS 0x53544344 /* Device STatuS */ -#define ASUS_WMI_METHODID_DSTS2 0x53545344 /* Device STatuS #2*/ +#define ASUS_WMI_METHODID_DCTS 0x53544344 /* Device status (DCTS) */ +#define ASUS_WMI_METHODID_DSTS 0x53545344 /* Device status (DSTS) */ #define ASUS_WMI_METHODID_BSTS 0x53545342 /* Bios STatuS ? */ #define ASUS_WMI_METHODID_DEVS 0x53564544 /* DEVice Set */ #define ASUS_WMI_METHODID_CFVS 0x53564643 /* CPU Frequency Volt Set */ -- cgit v1.2.3 From b096f626a6827ad2ced5ebdbdc04e62422d463f6 Mon Sep 17 00:00:00 2001 From: Yurii Pavlovskyi Date: Tue, 14 May 2019 21:07:05 +0200 Subject: platform/x86: asus-wmi: Switch fan boost mode The WMI exposes a write-only device ID where up to three fan modes can be switched on some laptops (TUF Gaming FX505GM). There is a hotkey combination Fn-F5 that does have a fan icon, which is designed to toggle between fan modes. The DSTS of the device ID returns information about the presence of this capability and the presence of each of the two additional fan modes as a bitmask (0x01 - overboost present, 0x02 - silent present) [1]. Add a SysFS entry that reads the last written value and updates value in WMI on write and a hotkey handler that toggles the modes taking into account their availability according to DSTS. Modes: * 0x00 - normal or balanced, * 0x01 - overboost, increased fan RPM, * 0x02 - silent, decreased fan RPM [1] Link: https://lkml.org/lkml/2019/4/12/110 Signed-off-by: Yurii Pavlovskyi Suggested-by: Daniel Drake Signed-off-by: Andy Shevchenko --- Documentation/ABI/testing/sysfs-platform-asus-wmi | 10 ++ drivers/platform/x86/asus-wmi.c | 151 ++++++++++++++++++++-- include/linux/platform_data/x86/asus-wmi.h | 1 + 3 files changed, 154 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi index 019e1e29370e..87ae5cc983bf 100644 --- a/Documentation/ABI/testing/sysfs-platform-asus-wmi +++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi @@ -36,3 +36,13 @@ KernelVersion: 3.5 Contact: "AceLan Kao" Description: Resume on lid open. 1 means on, 0 means off. + +What: /sys/devices/platform//fan_mode +Date: Apr 2019 +KernelVersion: 5.2 +Contact: "Yurii Pavlovskyi" +Description: + Fan boost mode: + * 0 - normal, + * 1 - overboost, + * 2 - silent diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a1d85667383c..5712bc56fa10 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -70,6 +70,7 @@ MODULE_LICENSE("GPL"); #define NOTIFY_KBD_BRTUP 0xc4 #define NOTIFY_KBD_BRTDWN 0xc5 #define NOTIFY_KBD_BRTTOGGLE 0xc7 +#define NOTIFY_KBD_FBM 0x99 #define ASUS_WMI_FNLOCK_BIOS_DISABLED BIT(0) @@ -80,6 +81,13 @@ MODULE_LICENSE("GPL"); #define ASUS_FAN_CTRL_MANUAL 1 #define ASUS_FAN_CTRL_AUTO 2 +#define ASUS_FAN_MODE_NORMAL 0 +#define ASUS_FAN_MODE_OVERBOOST 1 +#define ASUS_FAN_MODE_OVERBOOST_MASK 0x01 +#define ASUS_FAN_MODE_SILENT 2 +#define ASUS_FAN_MODE_SILENT_MASK 0x02 +#define ASUS_FAN_MODES_MASK 0x03 + #define USB_INTEL_XUSB2PR 0xD0 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 @@ -187,6 +195,10 @@ struct asus_wmi { int asus_hwmon_num_fans; int asus_hwmon_pwm; + bool fan_mode_available; + u8 fan_mode_mask; + u8 fan_mode; + struct hotplug_slot hotplug_slot; struct mutex hotplug_lock; struct mutex wmi_lock; @@ -1483,6 +1495,116 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } +/* Fan mode *******************************************************************/ + +static int fan_mode_check_present(struct asus_wmi *asus) +{ + u32 result; + int err; + + asus->fan_mode_available = false; + + err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_MODE, &result); + if (err) { + if (err == -ENODEV) + return 0; + else + return err; + } + + if ((result & ASUS_WMI_DSTS_PRESENCE_BIT) && + (result & ASUS_FAN_MODES_MASK)) { + asus->fan_mode_available = true; + asus->fan_mode_mask = result & ASUS_FAN_MODES_MASK; + } + + return 0; +} + +static int fan_mode_write(struct asus_wmi *asus) +{ + int err; + u8 value; + u32 retval; + + value = asus->fan_mode; + + pr_info("Set fan mode: %u\n", value); + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_FAN_MODE, value, &retval); + + if (err) { + pr_warn("Failed to set fan mode: %d\n", err); + return err; + } + + if (retval != 1) { + pr_warn("Failed to set fan mode (retval): 0x%x\n", retval); + return -EIO; + } + + return 0; +} + +static int fan_mode_switch_next(struct asus_wmi *asus) +{ + if (asus->fan_mode == ASUS_FAN_MODE_NORMAL) { + if (asus->fan_mode_mask & ASUS_FAN_MODE_OVERBOOST_MASK) + asus->fan_mode = ASUS_FAN_MODE_OVERBOOST; + else if (asus->fan_mode_mask & ASUS_FAN_MODE_SILENT_MASK) + asus->fan_mode = ASUS_FAN_MODE_SILENT; + } else if (asus->fan_mode == ASUS_FAN_MODE_OVERBOOST) { + if (asus->fan_mode_mask & ASUS_FAN_MODE_SILENT_MASK) + asus->fan_mode = ASUS_FAN_MODE_SILENT; + else + asus->fan_mode = ASUS_FAN_MODE_NORMAL; + } else { + asus->fan_mode = ASUS_FAN_MODE_NORMAL; + } + + return fan_mode_write(asus); +} + +static ssize_t fan_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct asus_wmi *asus = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", asus->fan_mode); +} + +static ssize_t fan_mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int result; + u8 new_mode; + + struct asus_wmi *asus = dev_get_drvdata(dev); + + result = kstrtou8(buf, 10, &new_mode); + if (result < 0) { + pr_warn("Trying to store invalid value\n"); + return result; + } + + if (new_mode == ASUS_FAN_MODE_OVERBOOST) { + if (!(asus->fan_mode_mask & ASUS_FAN_MODE_OVERBOOST_MASK)) + return -EINVAL; + } else if (new_mode == ASUS_FAN_MODE_SILENT) { + if (!(asus->fan_mode_mask & ASUS_FAN_MODE_SILENT_MASK)) + return -EINVAL; + } else if (new_mode != ASUS_FAN_MODE_NORMAL) { + return -EINVAL; + } + + asus->fan_mode = new_mode; + fan_mode_write(asus); + + return result; +} + +// Fan mode: 0 - normal, 1 - overboost, 2 - silent +static DEVICE_ATTR_RW(fan_mode); + /* Backlight ******************************************************************/ static int read_backlight_power(struct asus_wmi *asus) @@ -1761,6 +1883,11 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus) return; } + if (asus->fan_mode_available && code == NOTIFY_KBD_FBM) { + fan_mode_switch_next(asus); + return; + } + if (is_display_toggle(code) && asus->driver->quirks->no_display_toggle) return; @@ -1917,6 +2044,7 @@ static struct attribute *platform_attributes[] = { &dev_attr_touchpad.attr, &dev_attr_lid_resume.attr, &dev_attr_als_enable.attr, + &dev_attr_fan_mode.attr, NULL }; @@ -1938,6 +2066,8 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj, devid = ASUS_WMI_DEVID_LID_RESUME; else if (attr == &dev_attr_als_enable.attr) devid = ASUS_WMI_DEVID_ALS_ENABLE; + else if (attr == &dev_attr_fan_mode.attr) + ok = asus->fan_mode_available; if (devid != -1) ok = !(asus_wmi_get_devstate_simple(asus, devid) < 0); @@ -2037,12 +2167,7 @@ static int asus_wmi_platform_init(struct asus_wmi *asus) asus_wmi_set_devstate(ASUS_WMI_DEVID_CWAP, asus->driver->quirks->wapf, NULL); - return asus_wmi_sysfs_init(asus->platform_device); -} - -static void asus_wmi_platform_exit(struct asus_wmi *asus) -{ - asus_wmi_sysfs_exit(asus->platform_device); + return 0; } /* debugfs ********************************************************************/ @@ -2200,6 +2325,14 @@ static int asus_wmi_add(struct platform_device *pdev) if (err) goto fail_platform; + err = fan_mode_check_present(asus); + if (err) + goto fail_fan_mode; + + err = asus_wmi_sysfs_init(asus->platform_device); + if (err) + goto fail_sysfs; + err = asus_wmi_input_init(asus); if (err) goto fail_input; @@ -2277,7 +2410,9 @@ fail_leds: fail_hwmon: asus_wmi_input_exit(asus); fail_input: - asus_wmi_platform_exit(asus); + asus_wmi_sysfs_exit(asus->platform_device); +fail_sysfs: +fail_fan_mode: fail_platform: kfree(asus); return err; @@ -2294,7 +2429,7 @@ static int asus_wmi_remove(struct platform_device *device) asus_wmi_led_exit(asus); asus_wmi_rfkill_exit(asus); asus_wmi_debugfs_exit(asus); - asus_wmi_platform_exit(asus); + asus_wmi_sysfs_exit(asus->platform_device); asus_hwmon_fan_set_auto(asus); kfree(asus); diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 0668f76df921..8551156b8dca 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -57,6 +57,7 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +#define ASUS_WMI_DEVID_FAN_MODE 0x00110018 /* Misc */ #define ASUS_WMI_DEVID_CAMERA 0x00060013 -- cgit v1.2.3 From a48e23385fcf397e69e2a75d72a81c545ec8bec2 Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Mon, 27 May 2019 18:21:29 +0200 Subject: platform/x86: wmi: add context pointer field to struct wmi_device_id When using wmi_install_notify_handler() to initialize a WMI handler a data pointer can be supplied which will be passed on to the notification handler. No similar feature exist when handling WMI events via struct wmi_driver. Add a context field pointer to struct wmi_device_id and add a function find_guid_context() to retrieve that context pointer. Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Andy Shevchenko --- drivers/platform/x86/wmi.c | 22 ++++++++++++++++++++++ include/linux/mod_devicetable.h | 1 + 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index b08ffb769cbe..f3be1c008856 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -146,6 +146,28 @@ static bool find_guid(const char *guid_string, struct wmi_block **out) return false; } +static const void *find_guid_context(struct wmi_block *wblock, + struct wmi_driver *wdriver) +{ + const struct wmi_device_id *id; + uuid_le guid_input; + + if (wblock == NULL || wdriver == NULL) + return NULL; + if (wdriver->id_table == NULL) + return NULL; + + id = wdriver->id_table; + while (*id->guid_string) { + if (uuid_le_to_bin(id->guid_string, &guid_input)) + continue; + if (!memcmp(wblock->gblock.guid, &guid_input, 16)) + return id->context; + id++; + } + return NULL; +} + static int get_subobj_info(acpi_handle handle, const char *pathname, struct acpi_device_info **info) { diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 448621c32e4d..09366859aac2 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -798,6 +798,7 @@ struct tee_client_device_id { */ struct wmi_device_id { const char guid_string[UUID_STRING_LEN+1]; + const void *context; }; #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 440c4983de262f78033ec58f6abcd199a664327d Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Mon, 27 May 2019 18:21:30 +0200 Subject: platform/x86: wmi: add context argument to the probe function The struct wmi_device_id has a context pointer field, forward this pointer as an argument to the probe function in struct wmi_driver. Update existing users of the same probe function to accept this new context argument. Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Andy Shevchenko --- drivers/platform/x86/dell-smbios-wmi.c | 2 +- drivers/platform/x86/dell-wmi-descriptor.c | 3 ++- drivers/platform/x86/dell-wmi.c | 2 +- drivers/platform/x86/huawei-wmi.c | 2 +- drivers/platform/x86/intel-wmi-thunderbolt.c | 3 ++- drivers/platform/x86/wmi-bmof.c | 2 +- drivers/platform/x86/wmi.c | 3 ++- include/linux/wmi.h | 2 +- 8 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/x86/dell-smbios-wmi.c b/drivers/platform/x86/dell-smbios-wmi.c index c3ed3c8c17b9..add2687079f7 100644 --- a/drivers/platform/x86/dell-smbios-wmi.c +++ b/drivers/platform/x86/dell-smbios-wmi.c @@ -146,7 +146,7 @@ fail_smbios_cmd: return ret; } -static int dell_smbios_wmi_probe(struct wmi_device *wdev) +static int dell_smbios_wmi_probe(struct wmi_device *wdev, const void *context) { struct wmi_driver *wdriver = container_of(wdev->dev.driver, struct wmi_driver, driver); diff --git a/drivers/platform/x86/dell-wmi-descriptor.c b/drivers/platform/x86/dell-wmi-descriptor.c index 14ab250b7d5a..9994fd1a5acf 100644 --- a/drivers/platform/x86/dell-wmi-descriptor.c +++ b/drivers/platform/x86/dell-wmi-descriptor.c @@ -106,7 +106,8 @@ EXPORT_SYMBOL_GPL(dell_wmi_get_hotfix); * WMI buffer length 12 4 * WMI hotfix number 16 4 */ -static int dell_wmi_descriptor_probe(struct wmi_device *wdev) +static int dell_wmi_descriptor_probe(struct wmi_device *wdev, + const void *context) { union acpi_object *obj = NULL; struct descriptor_priv *priv; diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index d118bb73fcae..72b0a69a6ed0 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -672,7 +672,7 @@ static int dell_wmi_events_set_enabled(bool enable) return dell_smbios_error(ret); } -static int dell_wmi_probe(struct wmi_device *wdev) +static int dell_wmi_probe(struct wmi_device *wdev, const void *context) { struct dell_wmi_priv *priv; int ret; diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c index 52fcac5b393a..195a7f3638cb 100644 --- a/drivers/platform/x86/huawei-wmi.c +++ b/drivers/platform/x86/huawei-wmi.c @@ -166,7 +166,7 @@ static int huawei_wmi_input_setup(struct wmi_device *wdev) return input_register_device(priv->idev); } -static int huawei_wmi_probe(struct wmi_device *wdev) +static int huawei_wmi_probe(struct wmi_device *wdev, const void *context) { struct huawei_wmi_priv *priv; int err; diff --git a/drivers/platform/x86/intel-wmi-thunderbolt.c b/drivers/platform/x86/intel-wmi-thunderbolt.c index 4dfa61434a76..974c22a7ff61 100644 --- a/drivers/platform/x86/intel-wmi-thunderbolt.c +++ b/drivers/platform/x86/intel-wmi-thunderbolt.c @@ -56,7 +56,8 @@ static const struct attribute_group tbt_attribute_group = { .attrs = tbt_attrs, }; -static int intel_wmi_thunderbolt_probe(struct wmi_device *wdev) +static int intel_wmi_thunderbolt_probe(struct wmi_device *wdev, + const void *context) { int ret; diff --git a/drivers/platform/x86/wmi-bmof.c b/drivers/platform/x86/wmi-bmof.c index 8751a13134be..105a82b6b076 100644 --- a/drivers/platform/x86/wmi-bmof.c +++ b/drivers/platform/x86/wmi-bmof.c @@ -54,7 +54,7 @@ read_bmof(struct file *filp, struct kobject *kobj, return count; } -static int wmi_bmof_probe(struct wmi_device *wdev) +static int wmi_bmof_probe(struct wmi_device *wdev, const void *context) { struct bmof_priv *priv; int ret; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index f3be1c008856..2163fd8bf9e1 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -945,7 +945,8 @@ static int wmi_dev_probe(struct device *dev) dev_warn(dev, "failed to enable device -- probing anyway\n"); if (wdriver->probe) { - ret = wdriver->probe(dev_to_wdev(dev)); + ret = wdriver->probe(dev_to_wdev(dev), + find_guid_context(wblock, wdriver)); if (ret != 0) goto probe_failure; } diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 592f81afecbb..1e84c474a993 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -44,7 +44,7 @@ struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; - int (*probe)(struct wmi_device *wdev); + int (*probe)(struct wmi_device *wdev, const void *context); int (*remove)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd, -- cgit v1.2.3 From 36f34737ff48f66c8a19b8788311e4b40d4adf80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 15 Jun 2019 20:47:37 +0300 Subject: spi: Add a prototype for exported spi_set_cs_timing() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiler is not happy about spi_set_cs_timing() prototype. drivers/spi/spi.c:3016:6: warning: no previous prototype for ‘spi_set_cs_timing’ [-Wmissing-prototypes] void spi_set_cs_timing(struct spi_device *spi, u8 setup, u8 hold, ^~~~~~~~~~~~~~~~~ Let's add it to the header. Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d0c5ba746e01..0ec11f2911af 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -980,6 +980,8 @@ static inline void spi_message_free(struct spi_message *m) kfree(m); } +extern void spi_set_cs_timing(struct spi_device *spi, u8 setup, u8 hold, u8 inactive_dly); + extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_async_locked(struct spi_device *spi, -- cgit v1.2.3 From 3006a5224f15cf68edc4878799ac6d6089861518 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 7 Jun 2019 02:36:05 +0200 Subject: netfilter: synproxy: remove module dependency on IPv6 SYNPROXY This is a prerequisite for the infrastructure module NETFILTER_SYNPROXY. The new module is needed to avoid duplicated code for the SYNPROXY nftables support. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 36 ++++++++++++++++++++++++++++++++++++ net/ipv6/netfilter.c | 2 ++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 3a3dc4b1f0e7..35b12525ee45 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -8,6 +8,7 @@ #define __LINUX_IP6_NETFILTER_H #include +#include /* Extra routing may needed on local out, as the QUEUE target never returns * control to the table. @@ -35,6 +36,10 @@ struct nf_ipv6_ops { struct in6_addr *saddr); int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); + u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, + const struct tcphdr *th, u16 *mssp); + int (*cookie_v6_check)(const struct ipv6hdr *iph, + const struct tcphdr *th, __u32 cookie); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, @@ -154,6 +159,37 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) #endif } +static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, + const struct tcphdr *th, + u16 *mssp) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (v6_ops) + return v6_ops->cookie_init_sequence(iph, th, mssp); + + return 0; +#else + return __cookie_v6_init_sequence(iph, th, mssp); +#endif +} + +static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, + const struct tcphdr *th, __u32 cookie) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (v6_ops) + return v6_ops->cookie_v6_check(iph, th, cookie); + + return 0; +#else + return __cookie_v6_check(iph, th, cookie); +#endif +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 86048dce301b..dffb10fdc3e8 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -234,6 +234,8 @@ static const struct nf_ipv6_ops ipv6ops = { .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, .route = __nf_ip6_route, + .cookie_init_sequence = __cookie_v6_init_sequence, + .cookie_v6_check = __cookie_v6_check, #endif .route_input = ip6_route_input, .fragment = ip6_fragment, -- cgit v1.2.3 From c681edae33e86ff27be2d6cc717663d91df20b0e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Jun 2019 10:09:33 +0200 Subject: net: ipv4: move tcp_fastopen server side code to SipHash library Using a bare block cipher in non-crypto code is almost always a bad idea, not only for security reasons (and we've seen some examples of this in the kernel in the past), but also for performance reasons. In the TCP fastopen case, we call into the bare AES block cipher one or two times (depending on whether the connection is IPv4 or IPv6). On most systems, this results in a call chain such as crypto_cipher_encrypt_one(ctx, dst, src) crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), ...); aesni_encrypt kernel_fpu_begin(); aesni_enc(ctx, dst, src); // asm routine kernel_fpu_end(); It is highly unlikely that the use of special AES instructions has a benefit in this case, especially since we are doing the above twice for IPv6 connections, instead of using a transform which can process the entire input in one go. We could switch to the cbcmac(aes) shash, which would at least get rid of the duplicated overhead in *some* cases (i.e., today, only arm64 has an accelerated implementation of cbcmac(aes), while x86 will end up using the generic cbcmac template wrapping the AES-NI cipher, which basically ends up doing exactly the above). However, in the given context, it makes more sense to use a light-weight MAC algorithm that is more suitable for the purpose at hand, such as SipHash. Since the output size of SipHash already matches our chosen value for TCP_FASTOPEN_COOKIE_SIZE, and given that it accepts arbitrary input sizes, this greatly simplifies the code as well. NOTE: Server farms backing a single server IP for load balancing purposes and sharing a single fastopen key will be adversely affected by this change unless all systems in the pool receive their kernel upgrades at the same time. Signed-off-by: Ard Biesheuvel Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +--- include/net/tcp.h | 10 ++--- net/Kconfig | 2 - net/ipv4/tcp_fastopen.c | 97 ++++++++++++++++--------------------------------- 4 files changed, 36 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c23019a3b264..9ea0e71f5c6a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -58,12 +58,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { - union { - u8 val[TCP_FASTOPEN_COOKIE_MAX]; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr addr; -#endif - }; + u64 val[TCP_FASTOPEN_COOKIE_MAX / sizeof(u64)]; s8 len; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 96e0e53ff440..184930b02779 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1628,9 +1628,9 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err); /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX]; - __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; - struct rcu_head rcu; + __u8 key[TCP_FASTOPEN_KEY_MAX][TCP_FASTOPEN_KEY_LENGTH]; + int num; + struct rcu_head rcu; }; extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; @@ -1665,9 +1665,7 @@ bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, static inline int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) { - if (ctx->tfm[1]) - return 2; - return 1; + return ctx->num; } /* Latencies incurred by various limits for a sender. They are diff --git a/net/Kconfig b/net/Kconfig index d122f53c6fa2..57f51a279ad6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -67,8 +67,6 @@ source "net/xdp/Kconfig" config INET bool "TCP/IP networking" - select CRYPTO - select CRYPTO_AES ---help--- These are the protocols used on the Internet and on most local Ethernets. It is highly recommended to say Y here (this will enlarge diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 7d19fa4c8121..46b67128e1ca 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -37,14 +38,8 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = container_of(head, struct tcp_fastopen_context, rcu); - int i; - /* We own ctx, thus no need to hold the Fastopen-lock */ - for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) { - if (ctx->tfm[i]) - crypto_free_cipher(ctx->tfm[i]); - } - kfree(ctx); + kzfree(ctx); } void tcp_fastopen_destroy_cipher(struct sock *sk) @@ -72,41 +67,6 @@ void tcp_fastopen_ctx_destroy(struct net *net) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); } -static struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key, - void *backup_key, - unsigned int len) -{ - struct tcp_fastopen_context *new_ctx; - void *key = primary_key; - int err, i; - - new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL); - if (!new_ctx) - return ERR_PTR(-ENOMEM); - for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) - new_ctx->tfm[i] = NULL; - for (i = 0; i < (backup_key ? 2 : 1); i++) { - new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(new_ctx->tfm[i])) { - err = PTR_ERR(new_ctx->tfm[i]); - new_ctx->tfm[i] = NULL; - pr_err("TCP: TFO aes cipher alloc error: %d\n", err); - goto out; - } - err = crypto_cipher_setkey(new_ctx->tfm[i], key, len); - if (err) { - pr_err("TCP: TFO cipher key error: %d\n", err); - goto out; - } - memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len); - key = backup_key; - } - return new_ctx; -out: - tcp_fastopen_ctx_free(&new_ctx->rcu); - return ERR_PTR(err); -} - int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key, unsigned int len) @@ -115,11 +75,20 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, struct fastopen_queue *q; int err = 0; - ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; goto out; } + + memcpy(ctx->key[0], primary_key, len); + if (backup_key) { + memcpy(ctx->key[1], backup_key, len); + ctx->num = 2; + } else { + ctx->num = 1; + } + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; @@ -141,31 +110,30 @@ out: static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, - struct crypto_cipher *tfm, + const u8 *key, struct tcp_fastopen_cookie *foc) { + BUILD_BUG_ON(TCP_FASTOPEN_KEY_LENGTH != sizeof(siphash_key_t)); + BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); + if (req->rsk_ops->family == AF_INET) { const struct iphdr *iph = ip_hdr(syn); - __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - crypto_cipher_encrypt_one(tfm, foc->val, (void *)path); + foc->val[0] = siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + (const siphash_key_t *)key); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } - #if IS_ENABLED(CONFIG_IPV6) if (req->rsk_ops->family == AF_INET6) { const struct ipv6hdr *ip6h = ipv6_hdr(syn); - struct tcp_fastopen_cookie tmp; - struct in6_addr *buf; - int i; - - crypto_cipher_encrypt_one(tfm, tmp.val, - (void *)&ip6h->saddr); - buf = &tmp.addr; - for (i = 0; i < 4; i++) - buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf); + + foc->val[0] = siphash(&ip6h->saddr, + sizeof(ip6h->saddr) + + sizeof(ip6h->daddr), + (const siphash_key_t *)key); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } @@ -173,11 +141,8 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, return false; } -/* Generate the fastopen cookie by doing aes128 encryption on both - * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 - * addresses. For the longer IPv6 addresses use CBC-MAC. - * - * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. +/* Generate the fastopen cookie by applying SipHash to both the source and + * destination addresses. */ static void tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, @@ -189,7 +154,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk, rcu_read_lock(); ctx = tcp_fastopen_get_ctx(sk); if (ctx) - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[0], foc); rcu_read_unlock(); } @@ -253,7 +218,7 @@ static int tcp_fastopen_cookie_gen_check(struct sock *sk, if (!ctx) goto out; for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[i], foc); if (tcp_fastopen_cookie_match(foc, orig)) { ret = i + 1; goto out; -- cgit v1.2.3 From 4ed9890c4c44d2ead7b57ad65425e3fbe9b9d42a Mon Sep 17 00:00:00 2001 From: Anurag Kumar Vulisha Date: Fri, 10 May 2019 12:37:27 +0530 Subject: usb: gadget: send usb_gadget as an argument in get_config_params Passing struct usb_gadget * as an extra argument in get_config_params makes gadget drivers to easily update the U1DevExitLat & U2DevExitLat values based on the values passed from the device tree. This patch does the same Signed-off-by: Anurag Kumar Vulisha Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 2 +- include/linux/usb/gadget.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index b8a15840b4ff..9118b42c70b6 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -653,7 +653,7 @@ static int bos_desc(struct usb_composite_dev *cdev) /* Get Controller configuration */ if (cdev->gadget->ops->get_config_params) { - cdev->gadget->ops->get_config_params( + cdev->gadget->ops->get_config_params(cdev->gadget, &dcd_config_params); } else { dcd_config_params.bU1devExitLat = diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 7595056b96c1..fb19141151d8 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -310,7 +310,8 @@ struct usb_gadget_ops { int (*pullup) (struct usb_gadget *, int is_on); int (*ioctl)(struct usb_gadget *, unsigned code, unsigned long param); - void (*get_config_params)(struct usb_dcd_config_params *); + void (*get_config_params)(struct usb_gadget *, + struct usb_dcd_config_params *); int (*udc_start)(struct usb_gadget *, struct usb_gadget_driver *); int (*udc_stop)(struct usb_gadget *); -- cgit v1.2.3 From 99600fd47eafd20b9ba6e04562bb2fcc48475344 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 Apr 2019 07:15:05 +0800 Subject: clk: Add CLK_HW_INIT_* macros using .parent_hws With the new clk parenting code, struct clk_init_data was expanded to include .parent_hws, for clk drivers to directly list parents by pointing to their respective struct clk_hw's. Add macros that can take either one single struct clk_hw *, or an array of them, for drivers to use. A special CLK_HW_INIT_HWS macro is included, which takes an array of struct clk_hw *, but sets .num_parents to 1. This variant is to allow the reuse of the array, instead of having a compound literal allocated for each clk sharing the same parent. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bb6118f79784..70aad5cefea7 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -904,6 +904,29 @@ extern struct of_device_id __clk_of_table; .ops = _ops, \ }) +#define CLK_HW_INIT_HW(_name, _parent, _ops, _flags) \ + (&(struct clk_init_data) { \ + .flags = _flags, \ + .name = _name, \ + .parent_hws = (const struct clk_hw*[]) { _parent }, \ + .num_parents = 1, \ + .ops = _ops, \ + }) + +/* + * This macro is intended for drivers to be able to share the otherwise + * individual struct clk_hw[] compound literals created by the compiler + * when using CLK_HW_INIT_HW. It does NOT support multiple parents. + */ +#define CLK_HW_INIT_HWS(_name, _parent, _ops, _flags) \ + (&(struct clk_init_data) { \ + .flags = _flags, \ + .name = _name, \ + .parent_hws = _parent, \ + .num_parents = 1, \ + .ops = _ops, \ + }) + #define CLK_HW_INIT_PARENTS(_name, _parents, _ops, _flags) \ (&(struct clk_init_data) { \ .flags = _flags, \ @@ -913,6 +936,15 @@ extern struct of_device_id __clk_of_table; .ops = _ops, \ }) +#define CLK_HW_INIT_PARENTS_HW(_name, _parents, _ops, _flags) \ + (&(struct clk_init_data) { \ + .flags = _flags, \ + .name = _name, \ + .parent_hws = _parents, \ + .num_parents = ARRAY_SIZE(_parents), \ + .ops = _ops, \ + }) + #define CLK_HW_INIT_NO_PARENT(_name, _ops, _flags) \ (&(struct clk_init_data) { \ .flags = _flags, \ -- cgit v1.2.3 From 2d6b4f33e637bf51c50c536966a19e94a59f3212 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 3 May 2019 11:49:03 +0800 Subject: clk: Add CLK_HW_INIT_FW_NAME macro using .fw_name in .parent_data With the new clk parenting code, clk_init_data was expanded to include .parent_data, for clk drivers that have parents referenced using a combination of device tree clock-names, clock indices, and/or clk_hw pointers. Add a CLK_HW_INIT macro for specifying a single parent from the device tree using .fw_name in struct clk_parent_data. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 70aad5cefea7..b19063512a29 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -927,6 +927,17 @@ extern struct of_device_id __clk_of_table; .ops = _ops, \ }) +#define CLK_HW_INIT_FW_NAME(_name, _parent, _ops, _flags) \ + (&(struct clk_init_data) { \ + .flags = _flags, \ + .name = _name, \ + .parent_data = (const struct clk_parent_data[]) { \ + { .fw_name = _parent }, \ + }, \ + .num_parents = 1, \ + .ops = _ops, \ + }) + #define CLK_HW_INIT_PARENTS(_name, _parents, _ops, _flags) \ (&(struct clk_init_data) { \ .flags = _flags, \ -- cgit v1.2.3 From 13933109dff0a5abbfc3980304c6c21c90829810 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 Apr 2019 07:17:50 +0800 Subject: clk: Add CLK_HW_INIT_PARENT_DATA macro using .parent_data With the new clk parenting code, struct clk_init_data was expanded to include .parent_data, for clk drivers that have parents referenced using a combination of device tree clock-names, clock indices, and/or struct clk_hw pointers. Add a new macro that can take a list of struct clk_parent_data for drivers to use. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index b19063512a29..0fd14c4874d6 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -956,6 +956,15 @@ extern struct of_device_id __clk_of_table; .ops = _ops, \ }) +#define CLK_HW_INIT_PARENTS_DATA(_name, _parents, _ops, _flags) \ + (&(struct clk_init_data) { \ + .flags = _flags, \ + .name = _name, \ + .parent_data = _parents, \ + .num_parents = ARRAY_SIZE(_parents), \ + .ops = _ops, \ + }) + #define CLK_HW_INIT_NO_PARENT(_name, _ops, _flags) \ (&(struct clk_init_data) { \ .flags = _flags, \ -- cgit v1.2.3 From d7b15114aba956ca395ec5cc28f68fe861ffc208 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 Apr 2019 07:19:46 +0800 Subject: clk: fixed-factor: Add CLK_FIXED_FACTOR_HW which takes clk_hw pointer as parent With the new clk parenting code, clk_init_data was expanded to include .parent_hws, for clk drivers to directly reference parents by clk_hw. Add a new macro, CLK_FIXED_FACTOR_HW, that can take a struct clk_hw pointer, instead of a string, as its parent. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0fd14c4874d6..c85e9f3809f2 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -985,6 +985,17 @@ extern struct of_device_id __clk_of_table; _flags), \ } +#define CLK_FIXED_FACTOR_HW(_struct, _name, _parent, \ + _div, _mult, _flags) \ + struct clk_fixed_factor _struct = { \ + .div = _div, \ + .mult = _mult, \ + .hw.init = CLK_HW_INIT_HW(_name, \ + _parent, \ + &clk_fixed_factor_ops, \ + _flags), \ + } + #ifdef CONFIG_OF int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, -- cgit v1.2.3 From 1bef004e2680511ecbb6b5db3954fba430501ecb Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 6 May 2019 10:43:16 +0800 Subject: clk: fixed-factor: Add CLK_FIXED_FACTOR_HWS which takes list of struct clk_hw * With the new clk parenting code, clk_init_data was expanded to include .parent_hws, for clk drivers to directly reference parents by clk_hw. Add a new macro, CLK_FIXED_FACTOR_HWS, that can take an array of pointers to struct clk_hw, instead of a string, as its parent. Taking an array instead of a direct pointer allows the reuse of the array for multiple clks, rather than having one compound literal with the same contents allocated for each clk declaration. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c85e9f3809f2..146a6859969e 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -996,6 +996,21 @@ extern struct of_device_id __clk_of_table; _flags), \ } +/* + * This macro allows the driver to reuse the _parent array for multiple + * fixed factor clk declarations. + */ +#define CLK_FIXED_FACTOR_HWS(_struct, _name, _parent, \ + _div, _mult, _flags) \ + struct clk_fixed_factor _struct = { \ + .div = _div, \ + .mult = _mult, \ + .hw.init = CLK_HW_INIT_HWS(_name, \ + _parent, \ + &clk_fixed_factor_ops, \ + _flags), \ + } + #ifdef CONFIG_OF int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, -- cgit v1.2.3 From 8b13a48b891c7c855e9f3a401d91391a946f4ca7 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 3 May 2019 11:58:20 +0800 Subject: clk: fixed-factor: Add CLK_FIXED_FACTOR_FW_NAME for DT clock-names parent With the new clk parenting code, clk_init_data was expanded to include .parent_data, for clk drivers to specify parents using a combination of device tree clock-names, pointers to struct clk_hw, device tree clocks, and/or fallback global clock names. Add a new macro, CLK_FIXED_FACTOR_FW_NAME, that takes a string to match a clock-names entry in the device tree to specify the clock parent. Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 146a6859969e..e5c44f6dd897 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1011,6 +1011,17 @@ extern struct of_device_id __clk_of_table; _flags), \ } +#define CLK_FIXED_FACTOR_FW_NAME(_struct, _name, _parent, \ + _div, _mult, _flags) \ + struct clk_fixed_factor _struct = { \ + .div = _div, \ + .mult = _mult, \ + .hw.init = CLK_HW_INIT_FW_NAME(_name, \ + _parent, \ + &clk_fixed_factor_ops, \ + _flags), \ + } + #ifdef CONFIG_OF int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, -- cgit v1.2.3 From 4eb293487d05a69862a4907ee944aa271ed49a4c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Jun 2019 10:55:32 +0900 Subject: pinctrl: make pinconf.h self-contained This header uses 'bool', but it does not include any header by itself. So, it could cause unknown type name error, depending on the header include order, although probably has been included by someone else. Include to make it self-contained. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 93c9dd133e9d..9bebc3554809 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -14,6 +14,8 @@ #ifdef CONFIG_PINCONF +#include + struct pinctrl_dev; struct seq_file; -- cgit v1.2.3 From 29875a52915e09abb9703722054f6443cb492ccc Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 12 Oct 2018 17:06:06 +0200 Subject: mm: Add an apply_to_pfn_range interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 ++++ mm/memory.c | 135 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 113 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, + struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, + unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); + pud = pud_alloc(closure->mm, p4d, addr); if (!pud) return -ENOMEM; + do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); + if (!closure->alloc && pud_none_or_clear_bad(pud)) + continue; + err = apply_to_pmd_range(closure, pud, addr, next); if (err) break; } while (pud++, addr = next, addr != end); return err; } -static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_p4d_range(struct pfn_range_apply *closure, pgd_t *pgd, + unsigned long addr, unsigned long end) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); + p4d = p4d_alloc(closure->mm, pgd, addr); if (!p4d) return -ENOMEM; + do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); + if (!closure->alloc && p4d_none_or_clear_bad(p4d)) + continue; + err = apply_to_pud_range(closure, p4d, addr, next); if (err) break; } while (p4d++, addr = next, addr != end); return err; } -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. +/** + * apply_to_pfn_range - Scan a region of virtual memory, calling a provided + * function on each leaf page table entry + * @closure: Details about how to scan and what function to apply + * @addr: Start virtual address + * @size: Size of the region + * + * If @closure->alloc is set to 1, the function will fill in the page table + * as necessary. Otherwise it will skip non-present parts. + * Note: The caller must ensure that the range does not contain huge pages. + * The caller must also assure that the proper mmu_notifier functions are + * called before and after the call to apply_to_pfn_range. + * + * WARNING: Do not use this function unless you know exactly what you are + * doing. It is lacking support for huge pages and transparent huge pages. + * + * Return: Zero on success. If the provided function returns a non-zero status, + * the page table walk will terminate and that status will be returned. + * If @closure->alloc is set to 1, then this function may also return memory + * allocation errors arising from allocating page table memory. */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long addr, unsigned long size) { pgd_t *pgd; unsigned long next; @@ -2143,16 +2165,65 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, if (WARN_ON(addr >= end)) return -EINVAL; - pgd = pgd_offset(mm, addr); + pgd = pgd_offset(closure->mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!closure->alloc && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(closure, pgd, addr, next); if (err) break; } while (pgd++, addr = next, addr != end); return err; } + +/** + * struct page_range_apply - Closure structure for apply_to_page_range() + * @pter: The base closure structure we derive from + * @fn: The leaf pte function to call + * @data: The leaf pte function closure + */ +struct page_range_apply { + struct pfn_range_apply pter; + pte_fn_t fn; + void *data; +}; + +/* + * Callback wrapper to enable use of apply_to_pfn_range for + * the apply_to_page_range interface + */ +static int apply_to_page_range_wrapper(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *pter) +{ + struct page_range_apply *pra = + container_of(pter, typeof(*pra), pter); + + return pra->fn(pte, token, addr, pra->data); +} + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + * + * WARNING: Do not use this function unless you know exactly what you are + * doing. It is lacking support for huge pages and transparent huge pages. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + struct page_range_apply pra = { + .pter = {.mm = mm, + .alloc = 1, + .ptefn = apply_to_page_range_wrapper }, + .fn = fn, + .data = data + }; + + return apply_to_pfn_range(&pra.pter, addr, size); +} EXPORT_SYMBOL_GPL(apply_to_page_range); /* -- cgit v1.2.3 From 4fe51e9e7902b5724b618dadd9527b1bbf2b55cc Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 19 Mar 2019 13:12:30 +0100 Subject: mm: Add write-protect and clean utilities for address space ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. The added file "as_dirty_helpers.c" is initially listed as maintained by VMware under our DRM driver. If somebody would like it elsewhere, that's of course no problem. Notable changes since RFC: - Added comments to help avoid the usage of these function for VMAs it's not intended for. We also do advisory checks on the vm_flags and warn on illegal usage. - Perform the pte modifications the same way softdirty does. - Add mmu_notifier range invalidation calls. - Add a config option so that this code is not unconditionally included. - Tell the mmu_gather code about pending tlb flushes. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- MAINTAINERS | 1 + include/linux/mm.h | 9 +- mm/Kconfig | 3 + mm/Makefile | 1 + mm/as_dirty_helpers.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 mm/as_dirty_helpers.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 7a2f487ea49a..a55d4ef91b0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5179,6 +5179,7 @@ T: git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d06ce2a64af..a0bc2a82917e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2685,7 +2685,14 @@ struct pfn_range_apply { }; extern int apply_to_pfn_range(struct pfn_range_apply *closure, unsigned long address, unsigned long size); - +unsigned long apply_as_wrprotect(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS + bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index 000000000000..f600e31534fb --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero. + */ +static int apply_pt_wrprotect(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + pte_t ptent = *pte; + + if (pte_write(ptent)) { + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_wrprotect(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + } + + return 0; +} + +/** + * struct apply_as_clean - Closure structure for apply_as_clean + * @base: struct apply_as we derive from + * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap + * @bitmap: Bitmap with one bit for each page offset in the address_space range + * covered. + * @start: Address_space page offset of first modified pte relative + * to @bitmap_pgoff + * @end: Address_space page offset of last modified pte relative + * to @bitmap_pgoff + */ +struct apply_as_clean { + struct apply_as base; + pgoff_t bitmap_pgoff; + unsigned long *bitmap; + pgoff_t start; + pgoff_t end; +}; + +/** + * apply_pt_clean - Leaf pte callback to clean a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as_clean + * + * The function cleans a pte and records the range in + * virtual address space of touched ptes for efficient TLB flushes. + * It also records dirty ptes in a bitmap representing page offsets + * in the address_space, as well as the first and last of the bits + * touched. + * + * Return: Always zero. + */ +static int apply_pt_clean(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + struct apply_as_clean *clean = container_of(aas, typeof(*clean), base); + pte_t ptent = *pte; + + if (pte_dirty(ptent)) { + pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) + + aas->vma->vm_pgoff - clean->bitmap_pgoff; + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_mkclean(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + + __set_bit(pgoff, clean->bitmap); + clean->start = min(clean->start, pgoff); + clean->end = max(clean->end, pgoff + 1); + } + + return 0; +} + +/** + * apply_as_range - Apply a pte callback to all PTEs pointing into a range + * of an address_space. + * @mapping: Pointer to the struct address_space + * @aas: Closure structure + * @first_index: First page offset in the address_space + * @nr: Number of incremental page offsets to cover + * + * Return: Number of ptes touched. Note that this number might be larger + * than @nr if there are overlapping vmas + */ +static unsigned long apply_as_range(struct address_space *mapping, + struct apply_as *aas, + pgoff_t first_index, pgoff_t nr) +{ + struct vm_area_struct *vma; + pgoff_t vba, vea, cba, cea; + unsigned long start_addr, end_addr; + struct mmu_notifier_range range; + + i_mmap_lock_read(mapping); + vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, + first_index + nr - 1) { + unsigned long vm_flags = READ_ONCE(vma->vm_flags); + + /* + * We can only do advisory flag tests below, since we can't + * require the vm's mmap_sem to be held to protect the flags. + * Therefore, callers that strictly depend on specific mmap + * flags to remain constant throughout the operation must + * either ensure those flags are immutable for all relevant + * vmas or can't use this function. Fixing this properly would + * require the vma::vm_flags to be protected by a separate + * lock taken after the i_mmap_lock + */ + + /* Skip non-applicable VMAs */ + if ((vm_flags & (VM_SHARED | VM_WRITE)) != + (VM_SHARED | VM_WRITE)) + continue; + + /* Warn on and skip VMAs whose flags indicate illegal usage */ + if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO)) + continue; + + /* Clip to the vma */ + vba = vma->vm_pgoff; + vea = vba + vma_pages(vma); + cba = first_index; + cba = max(cba, vba); + cea = first_index + nr; + cea = min(cea, vea); + + /* Translate to virtual address */ + start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; + end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; + if (start_addr >= end_addr) + continue; + + aas->base.mm = vma->vm_mm; + aas->vma = vma; + aas->start = end_addr; + aas->end = start_addr; + + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, + vma, vma->vm_mm, start_addr, end_addr); + mmu_notifier_invalidate_range_start(&range); + + /* Needed when we only change protection? */ + flush_cache_range(vma, start_addr, end_addr); + + /* + * We're not using tlb_gather_mmu() since typically + * only a small subrange of PTEs are affected. + */ + inc_tlb_flush_pending(vma->vm_mm); + + /* Should not error since aas->base.alloc == 0 */ + WARN_ON(apply_to_pfn_range(&aas->base, start_addr, + end_addr - start_addr)); + if (aas->end > aas->start) + flush_tlb_range(vma, aas->start, aas->end); + + mmu_notifier_invalidate_range_end(&range); + dec_tlb_flush_pending(vma->vm_mm); + } + i_mmap_unlock_read(mapping); + + return aas->total; +} + +/** + * apply_as_wrprotect - Write-protect all ptes in an address_space range + * @mapping: The address_space we want to write protect + * @first_index: The first page offset in the range + * @nr: Number of incremental page offsets to cover + * + * WARNING: This function should only be used for address spaces whose + * vmas are marked VM_IO and that do not contain huge pages. + * To avoid interference with COW'd pages, vmas not marked VM_SHARED are + * simply skipped. + * + * Return: The number of ptes actually write-protected. Note that + * already write-protected ptes are not counted. + */ +unsigned long apply_as_wrprotect(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr) +{ + struct apply_as aas = { + .base = { + .alloc = 0, + .ptefn = apply_pt_wrprotect, + }, + .total = 0, + }; + + return apply_as_range(mapping, &aas, first_index, nr); +} +EXPORT_SYMBOL_GPL(apply_as_wrprotect); + +/** + * apply_as_clean - Clean all ptes in an address_space range + * @mapping: The address_space we want to clean + * @first_index: The first page offset in the range + * @nr: Number of incremental page offsets to cover + * @bitmap_pgoff: The page offset of the first bit in @bitmap + * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to + * cover the whole range @first_index..@first_index + @nr. + * @start: Pointer to number of the first set bit in @bitmap. + * is modified as new bits are set by the function. + * @end: Pointer to the number of the last set bit in @bitmap. + * none set. The value is modified as new bits are set by the function. + * + * Note: When this function returns there is no guarantee that a CPU has + * not already dirtied new ptes. However it will not clean any ptes not + * reported in the bitmap. + * + * If a caller needs to make sure all dirty ptes are picked up and none + * additional are added, it first needs to write-protect the address-space + * range and make sure new writers are blocked in page_mkwrite() or + * pfn_mkwrite(). And then after a TLB flush following the write-protection + * pick up all dirty bits. + * + * WARNING: This function should only be used for address spaces whose + * vmas are marked VM_IO and that do not contain huge pages. + * To avoid interference with COW'd pages, vmas not marked VM_SHARED are + * simply skipped. + * + * Return: The number of dirty ptes actually cleaned. + */ +unsigned long apply_as_clean(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end) +{ + bool none_set = (*start >= *end); + struct apply_as_clean clean = { + .base = { + .base = { + .alloc = 0, + .ptefn = apply_pt_clean, + }, + .total = 0, + }, + .bitmap_pgoff = bitmap_pgoff, + .bitmap = bitmap, + .start = none_set ? nr : *start, + .end = none_set ? 0 : *end, + }; + unsigned long ret = apply_as_range(mapping, &clean.base, first_index, + nr); + + *start = clean.start; + *end = clean.end; + return ret; +} +EXPORT_SYMBOL_GPL(apply_as_clean); -- cgit v1.2.3 From 378a60406415bd20ec6e845a3d6883d460656537 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 May 2019 11:17:22 -0300 Subject: mm/hmm: Remove duplicate condition test before wait_event_timeout The wait_event_timeout macro already tests the condition as its first action, so there is no reason to open code another version of this, all that does is skip the might_sleep() debugging in common cases, which is not helpful. Further, based on prior patches, we can now simplify the required condition test: - If range is valid memory then so is range->hmm - If hmm_release() has run then range->valid is set to false at the same time as dead, so no reason to check both. - A valid hmm has a valid hmm->mm. Allowing the return value of wait_event_timeout() (along with its internal barriers) to compute the result of the function. Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Ira Weiny Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- include/linux/hmm.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 1d97b6d62c5b..26e7c477490c 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -209,17 +209,8 @@ static inline unsigned long hmm_range_page_size(const struct hmm_range *range) static inline bool hmm_range_wait_until_valid(struct hmm_range *range, unsigned long timeout) { - /* Check if mm is dead ? */ - if (range->hmm == NULL || range->hmm->dead || range->hmm->mm == NULL) { - range->valid = false; - return false; - } - if (range->valid) - return true; - wait_event_timeout(range->hmm->wq, range->valid || range->hmm->dead, - msecs_to_jiffies(timeout)); - /* Return current valid status just in case we get lucky */ - return range->valid; + return wait_event_timeout(range->hmm->wq, range->valid, + msecs_to_jiffies(timeout)) != 0; } /* -- cgit v1.2.3 From 47f245985a4f3e270b1e4f28aa49f4c939527981 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 May 2019 11:08:28 -0300 Subject: mm/hmm: Hold on to the mmget for the lifetime of the range Range functions like hmm_range_snapshot() and hmm_range_fault() call find_vma, which requires hodling the mmget() and the mmap_sem for the mm. Make this simpler for the callers by holding the mmget() inside the range for the lifetime of the range. Other functions that accept a range should only be called if the range is registered. This has the side effect of directly preventing hmm_release() from happening while a range is registered. That means range->dead cannot be false during the lifetime of the range, so remove dead and hmm_mirror_mm_is_alive() entirely. Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Reviewed-by: Ralph Campbell Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- include/linux/hmm.h | 26 -------------------------- mm/hmm.c | 32 +++++++++++--------------------- 2 files changed, 11 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 26e7c477490c..bf013e965257 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -82,7 +82,6 @@ * @mirrors_sem: read/write semaphore protecting the mirrors list * @wq: wait queue for user waiting on a range invalidation * @notifiers: count of active mmu notifiers - * @dead: is the mm dead ? */ struct hmm { struct mm_struct *mm; @@ -95,7 +94,6 @@ struct hmm { wait_queue_head_t wq; struct rcu_head rcu; long notifiers; - bool dead; }; /* @@ -459,30 +457,6 @@ struct hmm_mirror { int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); void hmm_mirror_unregister(struct hmm_mirror *mirror); -/* - * hmm_mirror_mm_is_alive() - test if mm is still alive - * @mirror: the HMM mm mirror for which we want to lock the mmap_sem - * Return: false if the mm is dead, true otherwise - * - * This is an optimization, it will not always accurately return false if the - * mm is dead; i.e., there can be false negatives (process is being killed but - * HMM is not yet informed of that). It is only intended to be used to optimize - * out cases where the driver is about to do something time consuming and it - * would be better to skip it if the mm is dead. - */ -static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror) -{ - struct mm_struct *mm; - - if (!mirror || !mirror->hmm) - return false; - mm = READ_ONCE(mirror->hmm->mm); - if (mirror->hmm->dead || !mm) - return false; - - return true; -} - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ diff --git a/mm/hmm.c b/mm/hmm.c index 73c8af4827fe..1eddda45cefa 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -67,7 +67,6 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) mutex_init(&hmm->lock); kref_init(&hmm->kref); hmm->notifiers = 0; - hmm->dead = false; hmm->mm = mm; hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; @@ -120,21 +119,16 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; - struct hmm_range *range; /* Bail out if hmm is in the process of being freed */ if (!kref_get_unless_zero(&hmm->kref)) return; - /* Report this HMM as dying. */ - hmm->dead = true; - - /* Wake-up everyone waiting on any range. */ - mutex_lock(&hmm->lock); - list_for_each_entry(range, &hmm->ranges, list) - range->valid = false; - wake_up_all(&hmm->wq); - mutex_unlock(&hmm->lock); + /* + * Since hmm_range_register() holds the mmget() lock hmm_release() is + * prevented as long as a range exists. + */ + WARN_ON(!list_empty_careful(&hmm->ranges)); down_write(&hmm->mirrors_sem); mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror, @@ -903,8 +897,8 @@ int hmm_range_register(struct hmm_range *range, range->start = start; range->end = end; - /* Check if hmm_mm_destroy() was call. */ - if (hmm->mm == NULL || hmm->dead) + /* Prevent hmm_release() from running while the range is valid */ + if (!mmget_not_zero(hmm->mm)) return -EFAULT; /* Initialize range to track CPU page table updates. */ @@ -942,11 +936,12 @@ void hmm_range_unregister(struct hmm_range *range) return; mutex_lock(&hmm->lock); - list_del(&range->list); + list_del_init(&range->list); mutex_unlock(&hmm->lock); /* Drop reference taken by hmm_range_register() */ range->valid = false; + mmput(hmm->mm); hmm_put(hmm); range->hmm = NULL; } @@ -974,10 +969,7 @@ long hmm_range_snapshot(struct hmm_range *range) struct vm_area_struct *vma; struct mm_walk mm_walk; - /* Check if hmm_mm_destroy() was call. */ - if (hmm->mm == NULL || hmm->dead) - return -EFAULT; - + lockdep_assert_held(&hmm->mm->mmap_sem); do { /* If range is no longer valid force retry. */ if (!range->valid) @@ -1072,9 +1064,7 @@ long hmm_range_fault(struct hmm_range *range, bool block) struct mm_walk mm_walk; int ret; - /* Check if hmm_mm_destroy() was call. */ - if (hmm->mm == NULL || hmm->dead) - return -EFAULT; + lockdep_assert_held(&hmm->mm->mmap_sem); do { /* If range is no longer valid force retry. */ -- cgit v1.2.3 From 90ec7a76cc4ba65bfedeb8621cba09cd5a317d8f Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 16 May 2019 15:00:20 +0530 Subject: iommu/io-pgtable-arm: Add support to use system cache Few Qualcomm platforms such as, sdm845 have an additional outer cache called as System cache, aka. Last level cache (LLC) that allows non-coherent devices to upgrade to using caching. This cache sits right before the DDR, and is tightly coupled with the memory controller. The clients using this cache request their slices from this system cache, make it active, and can then start using it. There is a fundamental assumption that non-coherent devices can't access caches. This change adds an exception where they *can* use some level of cache despite still being non-coherent overall. The coherent devices that use cacheable memory, and CPU make use of this system cache by default. Looking at memory types, we have following - a) Normal uncached :- MAIR 0x44, inner non-cacheable, outer non-cacheable; b) Normal cached :- MAIR 0xff, inner read write-back non-transient, outer read write-back non-transient; attribute setting for coherenet I/O devices. and, for non-coherent i/o devices that can allocate in system cache another type gets added - c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable, outer read write-back non-transient Coherent I/O devices use system cache by marking the memory as normal cached. Non-coherent I/O devices should mark the memory as normal sys-cached in page tables to use system cache. Acked-by: Robin Murphy Signed-off-by: Vivek Gautam Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 9 ++++++++- include/linux/iommu.h | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4e21efbc4459..2454ac11aa97 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -167,10 +167,12 @@ #define ARM_LPAE_MAIR_ATTR_MASK 0xff #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 #define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 +#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) @@ -470,6 +472,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_CACHE) pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); + else if (prot & IOMMU_QCOM_SYS_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); } if (prot & IOMMU_NOEXEC) @@ -857,7 +862,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_MAIR_ATTR_WBRWA << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | (ARM_LPAE_MAIR_ATTR_DEVICE - << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | + (ARM_LPAE_MAIR_ATTR_INC_OWBRWA + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); cfg->arm_lpae_s1_cfg.mair[0] = reg; cfg->arm_lpae_s1_cfg.mair[1] = 0; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a815cf6f6f47..8ee3fbaf5855 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,12 @@ * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) +/* + * Non-coherent masters on few Qualcomm SoCs can use this page protection flag + * to set correct cacheability attributes to use an outer level of cache - + * last level cache, aka system cache. + */ +#define IOMMU_QCOM_SYS_CACHE (1 << 6) struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From 2589726d12a1b12eaaa93c7f1ea64287e383c7a5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 15 Jun 2019 12:12:20 -0700 Subject: bpf: introduce bounded loops Allow the verifier to validate the loops by simulating their execution. Exisiting programs have used '#pragma unroll' to unroll the loops by the compiler. Instead let the verifier simulate all iterations of the loop. In order to do that introduce parentage chain of bpf_verifier_state and 'branches' counter for the number of branches left to explore. See more detailed algorithm description in bpf_verifier.h This algorithm borrows the key idea from Edward Cree approach: https://patchwork.ozlabs.org/patch/877222/ Additional state pruning heuristics make such brute force loop walk practical even for large loops. Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 51 ++++++++++++++- kernel/bpf/verifier.c | 143 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 181 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 704ed7971472..03037373b447 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -194,6 +194,53 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; + struct bpf_verifier_state *parent; + /* + * 'branches' field is the number of branches left to explore: + * 0 - all possible paths from this state reached bpf_exit or + * were safely pruned + * 1 - at least one path is being explored. + * This state hasn't reached bpf_exit + * 2 - at least two paths are being explored. + * This state is an immediate parent of two children. + * One is fallthrough branch with branches==1 and another + * state is pushed into stack (to be explored later) also with + * branches==1. The parent of this state has branches==1. + * The verifier state tree connected via 'parent' pointer looks like: + * 1 + * 1 + * 2 -> 1 (first 'if' pushed into stack) + * 1 + * 2 -> 1 (second 'if' pushed into stack) + * 1 + * 1 + * 1 bpf_exit. + * + * Once do_check() reaches bpf_exit, it calls update_branch_counts() + * and the verifier state tree will look: + * 1 + * 1 + * 2 -> 1 (first 'if' pushed into stack) + * 1 + * 1 -> 1 (second 'if' pushed into stack) + * 0 + * 0 + * 0 bpf_exit. + * After pop_stack() the do_check() will resume at second 'if'. + * + * If is_state_visited() sees a state with branches > 0 it means + * there is a loop. If such state is exactly equal to the current state + * it's an infinite loop. Note states_equal() checks for states + * equvalency, so two states being 'states_equal' does not mean + * infinite loop. The exact comparison is provided by + * states_maybe_looping() function. It's a stronger pre-check and + * much faster than states_equal(). + * + * This algorithm may not find all possible infinite loops or + * loop iteration count may be too high. + * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in. + */ + u32 branches; u32 insn_idx; u32 curframe; u32 active_spin_lock; @@ -312,7 +359,9 @@ struct bpf_verifier_env { } cfg; u32 subprog_cnt; /* number of instructions analyzed by the verifier */ - u32 insn_processed; + u32 prev_insn_processed, insn_processed; + /* number of jmps, calls, exits analyzed so far */ + u32 prev_jmps_processed, jmps_processed; /* total verification time */ u64 verification_time; /* maximum number of verifier states kept in 'branching' instructions */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8d3a4ef1d969..25baa3c8cdd2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -721,6 +721,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; dst_state->active_spin_lock = src->active_spin_lock; + dst_state->branches = src->branches; + dst_state->parent = src->parent; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -736,6 +738,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return 0; } +static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + while (st) { + u32 br = --st->branches; + + /* WARN_ON(br > 1) technically makes sense here, + * but see comment in push_stack(), hence: + */ + WARN_ONCE((int)br < 0, + "BUG update_branch_counts:branches_to_explore=%d\n", + br); + if (br) + break; + st = st->parent; + } +} + static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx) { @@ -789,6 +808,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, env->stack_size); goto err; } + if (elem->st.parent) { + ++elem->st.parent->branches; + /* WARN_ON(branches > 2) technically makes sense here, + * but + * 1. speculative states will bump 'branches' for non-branch + * instructions + * 2. is_state_visited() heuristics may decide not to create + * a new state for a sequence of branches and all such current + * and cloned states will be pointing to a single parent state + * which might have large 'branches' count. + */ + } return &elem->st; err: free_verifier_state(env->cur_state, true); @@ -5682,7 +5713,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx) * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, + bool loop_ok) { int *insn_stack = env->cfg.insn_stack; int *insn_state = env->cfg.insn_state; @@ -5712,6 +5744,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) insn_stack[env->cfg.cur_stack++] = w; return 1; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { + if (loop_ok && env->allow_ptr_leaks) + return 0; verbose_linfo(env, t, "%d: ", t); verbose_linfo(env, w, "%d: ", w); verbose(env, "back-edge from insn %d to %d\n", t, w); @@ -5763,7 +5797,7 @@ peek_stack: if (opcode == BPF_EXIT) { goto mark_explored; } else if (opcode == BPF_CALL) { - ret = push_insn(t, t + 1, FALLTHROUGH, env); + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); if (ret == 1) goto peek_stack; else if (ret < 0) @@ -5772,7 +5806,8 @@ peek_stack: init_explored_state(env, t + 1); if (insns[t].src_reg == BPF_PSEUDO_CALL) { init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, + env, false); if (ret == 1) goto peek_stack; else if (ret < 0) @@ -5785,7 +5820,7 @@ peek_stack: } /* unconditional jump with single edge */ ret = push_insn(t, t + insns[t].off + 1, - FALLTHROUGH, env); + FALLTHROUGH, env, true); if (ret == 1) goto peek_stack; else if (ret < 0) @@ -5798,13 +5833,13 @@ peek_stack: } else { /* conditional jump with two edges */ init_explored_state(env, t); - ret = push_insn(t, t + 1, FALLTHROUGH, env); + ret = push_insn(t, t + 1, FALLTHROUGH, env, true); if (ret == 1) goto peek_stack; else if (ret < 0) goto err_free; - ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); + ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); if (ret == 1) goto peek_stack; else if (ret < 0) @@ -5814,7 +5849,7 @@ peek_stack: /* all other non-branch instructions with single * fall-through edge */ - ret = push_insn(t, t + 1, FALLTHROUGH, env); + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); if (ret == 1) goto peek_stack; else if (ret < 0) @@ -6247,6 +6282,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, sl = *explored_state(env, insn); while (sl) { + if (sl->state.branches) + goto next; if (sl->state.insn_idx != insn || sl->state.curframe != cur->curframe) goto next; @@ -6611,12 +6648,32 @@ static int propagate_liveness(struct bpf_verifier_env *env, return 0; } +static bool states_maybe_looping(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) +{ + struct bpf_func_state *fold, *fcur; + int i, fr = cur->curframe; + + if (old->curframe != fr) + return false; + + fold = old->frame[fr]; + fcur = cur->frame[fr]; + for (i = 0; i < MAX_BPF_REG; i++) + if (memcmp(&fold->regs[i], &fcur->regs[i], + offsetof(struct bpf_reg_state, parent))) + return false; + return true; +} + + static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; + bool add_new_state = false; if (!env->insn_aux_data[insn_idx].prune_point) /* this 'insn_idx' instruction wasn't marked, so we will not @@ -6624,6 +6681,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) */ return 0; + /* bpf progs typically have pruning point every 4 instructions + * http://vger.kernel.org/bpfconf2019.html#session-1 + * Do not add new state for future pruning if the verifier hasn't seen + * at least 2 jumps and at least 8 instructions. + * This heuristics helps decrease 'total_states' and 'peak_states' metric. + * In tests that amounts to up to 50% reduction into total verifier + * memory consumption and 20% verifier time speedup. + */ + if (env->jmps_processed - env->prev_jmps_processed >= 2 && + env->insn_processed - env->prev_insn_processed >= 8) + add_new_state = true; + pprev = explored_state(env, insn_idx); sl = *pprev; @@ -6633,6 +6702,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) states_cnt++; if (sl->state.insn_idx != insn_idx) goto next; + if (sl->state.branches) { + if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur)) { + verbose_linfo(env, insn_idx, "; "); + verbose(env, "infinite loop detected at insn %d\n", insn_idx); + return -EINVAL; + } + /* if the verifier is processing a loop, avoid adding new state + * too often, since different loop iterations have distinct + * states and may not help future pruning. + * This threshold shouldn't be too low to make sure that + * a loop with large bound will be rejected quickly. + * The most abusive loop will be: + * r1 += 1 + * if r1 < 1000000 goto pc-2 + * 1M insn_procssed limit / 100 == 10k peak states. + * This threshold shouldn't be too high either, since states + * at the end of the loop are likely to be useful in pruning. + */ + if (env->jmps_processed - env->prev_jmps_processed < 20 && + env->insn_processed - env->prev_insn_processed < 100) + add_new_state = false; + goto miss; + } if (states_equal(env, &sl->state, cur)) { sl->hit_cnt++; /* reached equivalent register/stack state, @@ -6650,7 +6743,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return err; return 1; } - sl->miss_cnt++; +miss: + /* when new state is not going to be added do not increase miss count. + * Otherwise several loop iterations will remove the state + * recorded earlier. The goal of these heuristics is to have + * states from some iterations of the loop (some in the beginning + * and some at the end) to help pruning. + */ + if (add_new_state) + sl->miss_cnt++; /* heuristic to determine whether this state is beneficial * to keep checking from state equivalence point of view. * Higher numbers increase max_states_per_insn and verification time, @@ -6662,6 +6763,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) */ *pprev = sl->next; if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { + u32 br = sl->state.branches; + + WARN_ONCE(br, + "BUG live_done but branches_to_explore %d\n", + br); free_verifier_state(&sl->state, false); kfree(sl); env->peak_states--; @@ -6687,18 +6793,25 @@ next: if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) return 0; - /* there were no equivalent states, remember current one. - * technically the current state is not proven to be safe yet, + if (!add_new_state) + return 0; + + /* There were no equivalent states, remember the current one. + * Technically the current state is not proven to be safe yet, * but it will either reach outer most bpf_exit (which means it's safe) - * or it will be rejected. Since there are no loops, we won't be + * or it will be rejected. When there are no loops the verifier won't be * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) - * again on the way to bpf_exit + * again on the way to bpf_exit. + * When looping the sl->state.branches will be > 0 and this state + * will not be considered for equivalence until branches == 0. */ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) return -ENOMEM; env->total_states++; env->peak_states++; + env->prev_jmps_processed = env->jmps_processed; + env->prev_insn_processed = env->insn_processed; /* add new state to the head of linked list */ new = &new_sl->state; @@ -6709,6 +6822,9 @@ next: return err; } new->insn_idx = insn_idx; + WARN_ONCE(new->branches != 1, + "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); + cur->parent = new; new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all @@ -6795,6 +6911,7 @@ static int do_check(struct bpf_verifier_env *env) return -ENOMEM; state->curframe = 0; state->speculative = false; + state->branches = 1; state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); if (!state->frame[0]) { kfree(state); @@ -7001,6 +7118,7 @@ static int do_check(struct bpf_verifier_env *env) } else if (class == BPF_JMP || class == BPF_JMP32) { u8 opcode = BPF_OP(insn->code); + env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 || @@ -7086,6 +7204,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: + update_branch_counts(env, env->cur_state); err = pop_stack(env, &env->prev_insn_idx, &env->insn_idx); if (err < 0) { -- cgit v1.2.3 From b5dc0163d8fd78e64a7e21f309cf932fda34353e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 15 Jun 2019 12:12:25 -0700 Subject: bpf: precise scalar_value tracking Introduce precision tracking logic that helps cilium programs the most: old clang old clang new clang new clang with all patches with all patches bpf_lb-DLB_L3.o 1838 2283 1923 1863 bpf_lb-DLB_L4.o 3218 2657 3077 2468 bpf_lb-DUNKNOWN.o 1064 545 1062 544 bpf_lxc-DDROP_ALL.o 26935 23045 166729 22629 bpf_lxc-DUNKNOWN.o 34439 35240 174607 28805 bpf_netdev.o 9721 8753 8407 6801 bpf_overlay.o 6184 7901 5420 4754 bpf_lxc_jit.o 39389 50925 39389 50925 Consider code: 654: (85) call bpf_get_hash_recalc#34 655: (bf) r7 = r0 656: (15) if r8 == 0x0 goto pc+29 657: (bf) r2 = r10 658: (07) r2 += -48 659: (18) r1 = 0xffff8881e41e1b00 661: (85) call bpf_map_lookup_elem#1 662: (15) if r0 == 0x0 goto pc+23 663: (69) r1 = *(u16 *)(r0 +0) 664: (15) if r1 == 0x0 goto pc+21 665: (bf) r8 = r7 666: (57) r8 &= 65535 667: (bf) r2 = r8 668: (3f) r2 /= r1 669: (2f) r2 *= r1 670: (bf) r1 = r8 671: (1f) r1 -= r2 672: (57) r1 &= 255 673: (25) if r1 > 0x1e goto pc+12 R0=map_value(id=0,off=0,ks=20,vs=64,imm=0) R1_w=inv(id=0,umax_value=30,var_off=(0x0; 0x1f)) 674: (67) r1 <<= 1 675: (0f) r0 += r1 At this point the verifier will notice that scalar R1 is used in map pointer adjustment. R1 has to be precise for later operations on R0 to be validated properly. The verifier will backtrack the above code in the following way: last_idx 675 first_idx 664 regs=2 stack=0 before 675: (0f) r0 += r1 // started backtracking R1 regs=2 is a bitmask regs=2 stack=0 before 674: (67) r1 <<= 1 regs=2 stack=0 before 673: (25) if r1 > 0x1e goto pc+12 regs=2 stack=0 before 672: (57) r1 &= 255 regs=2 stack=0 before 671: (1f) r1 -= r2 // now both R1 and R2 has to be precise -> regs=6 mask regs=6 stack=0 before 670: (bf) r1 = r8 // after this insn R8 and R2 has to be precise regs=104 stack=0 before 669: (2f) r2 *= r1 // after this one R8, R2, and R1 regs=106 stack=0 before 668: (3f) r2 /= r1 regs=106 stack=0 before 667: (bf) r2 = r8 regs=102 stack=0 before 666: (57) r8 &= 65535 regs=102 stack=0 before 665: (bf) r8 = r7 regs=82 stack=0 before 664: (15) if r1 == 0x0 goto pc+21 // this is the end of verifier state. The following regs will be marked precised: R1_rw=invP(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R7_rw=invP(id=0) parent didn't have regs=82 stack=0 marks // so backtracking continues into parent state last_idx 663 first_idx 655 regs=82 stack=0 before 663: (69) r1 = *(u16 *)(r0 +0) // R1 was assigned no need to track it further regs=80 stack=0 before 662: (15) if r0 == 0x0 goto pc+23 // keep tracking R7 regs=80 stack=0 before 661: (85) call bpf_map_lookup_elem#1 // keep tracking R7 regs=80 stack=0 before 659: (18) r1 = 0xffff8881e41e1b00 regs=80 stack=0 before 658: (07) r2 += -48 regs=80 stack=0 before 657: (bf) r2 = r10 regs=80 stack=0 before 656: (15) if r8 == 0x0 goto pc+29 regs=80 stack=0 before 655: (bf) r7 = r0 // here the assignment into R7 // mark R0 to be precise: R0_rw=invP(id=0) parent didn't have regs=1 stack=0 marks // regs=1 -> tracking R0 last_idx 654 first_idx 644 regs=1 stack=0 before 654: (85) call bpf_get_hash_recalc#34 // and in the parent frame it was a return value // nothing further to backtrack Two scalar registers not marked precise are equivalent from state pruning point of view. More details in the patch comments. It doesn't support bpf2bpf calls yet and enabled for root only. Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 18 ++ kernel/bpf/verifier.c | 491 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 498 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 03037373b447..19393b0964a8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -139,6 +139,8 @@ struct bpf_reg_state { */ s32 subreg_def; enum bpf_reg_liveness live; + /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */ + bool precise; }; enum bpf_stack_slot_type { @@ -190,6 +192,11 @@ struct bpf_func_state { struct bpf_stack_state *stack; }; +struct bpf_idx_pair { + u32 prev_idx; + u32 idx; +}; + #define MAX_CALL_FRAMES 8 struct bpf_verifier_state { /* call stack tracking */ @@ -245,6 +252,17 @@ struct bpf_verifier_state { u32 curframe; u32 active_spin_lock; bool speculative; + + /* first and last insn idx of this verifier state */ + u32 first_insn_idx; + u32 last_insn_idx; + /* jmp history recorded from first to last. + * backtracking is using it to go from last to first. + * For most states jmp_history_cnt is [0-3]. + * For loops can go up to ~40. + */ + struct bpf_idx_pair *jmp_history; + u32 jmp_history_cnt; }; #define bpf_get_spilled_reg(slot, frame) \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 870c8f19ce80..709ce4cef8ba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -455,12 +455,12 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, " R%d", i); print_liveness(env, reg->live); verbose(env, "=%s", reg_type_str[t]); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); - if (t == PTR_TO_STACK) - verbose(env, ",call_%d", func(env, reg)->callsite); } else { verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -522,11 +522,17 @@ static void print_verifier_state(struct bpf_verifier_env *env, continue; verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); print_liveness(env, state->stack[i].spilled_ptr.live); - if (state->stack[i].slot_type[0] == STACK_SPILL) - verbose(env, "=%s", - reg_type_str[state->stack[i].spilled_ptr.type]); - else + if (state->stack[i].slot_type[0] == STACK_SPILL) { + reg = &state->stack[i].spilled_ptr; + t = reg->type; + verbose(env, "=%s", reg_type_str[t]); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) + verbose(env, "%lld", reg->var_off.value + reg->off); + } else { verbose(env, "=%s", types_buf); + } } if (state->acquired_refs && state->refs[0].id) { verbose(env, " refs=%d", state->refs[0].id); @@ -675,6 +681,13 @@ static void free_func_state(struct bpf_func_state *state) kfree(state); } +static void clear_jmp_history(struct bpf_verifier_state *state) +{ + kfree(state->jmp_history); + state->jmp_history = NULL; + state->jmp_history_cnt = 0; +} + static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { @@ -684,6 +697,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, free_func_state(state->frame[i]); state->frame[i] = NULL; } + clear_jmp_history(state); if (free_self) kfree(state); } @@ -711,8 +725,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src) { struct bpf_func_state *dst; + u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; int i, err; + if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { + kfree(dst_state->jmp_history); + dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); + if (!dst_state->jmp_history) + return -ENOMEM; + } + memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); + dst_state->jmp_history_cnt = src->jmp_history_cnt; + /* if dst has more stack frames then src frame, free them */ for (i = src->curframe + 1; i <= dst_state->curframe; i++) { free_func_state(dst_state->frame[i]); @@ -723,6 +747,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->active_spin_lock = src->active_spin_lock; dst_state->branches = src->branches; dst_state->parent = src->parent; + dst_state->first_insn_idx = src->first_insn_idx; + dst_state->last_insn_idx = src->last_insn_idx; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -967,6 +993,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) reg->smax_value = S64_MAX; reg->umin_value = 0; reg->umax_value = U64_MAX; + + /* constant backtracking is enabled for root only for now */ + reg->precise = capable(CAP_SYS_ADMIN) ? false : true; } /* Mark a register as having a completely unknown (scalar) value. */ @@ -1378,6 +1407,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return 0; } +/* for any branch, call, exit record the history of jmps in the given state */ +static int push_jmp_history(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur) +{ + u32 cnt = cur->jmp_history_cnt; + struct bpf_idx_pair *p; + + cnt++; + p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); + if (!p) + return -ENOMEM; + p[cnt - 1].idx = env->insn_idx; + p[cnt - 1].prev_idx = env->prev_insn_idx; + cur->jmp_history = p; + cur->jmp_history_cnt = cnt; + return 0; +} + +/* Backtrack one insn at a time. If idx is not at the top of recorded + * history then previous instruction came from straight line execution. + */ +static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, + u32 *history) +{ + u32 cnt = *history; + + if (cnt && st->jmp_history[cnt - 1].idx == i) { + i = st->jmp_history[cnt - 1].prev_idx; + (*history)--; + } else { + i--; + } + return i; +} + +/* For given verifier state backtrack_insn() is called from the last insn to + * the first insn. Its purpose is to compute a bitmask of registers and + * stack slots that needs precision in the parent verifier state. + */ +static int backtrack_insn(struct bpf_verifier_env *env, int idx, + u32 *reg_mask, u64 *stack_mask) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = verbose, + .private_data = env, + }; + struct bpf_insn *insn = env->prog->insnsi + idx; + u8 class = BPF_CLASS(insn->code); + u8 opcode = BPF_OP(insn->code); + u8 mode = BPF_MODE(insn->code); + u32 dreg = 1u << insn->dst_reg; + u32 sreg = 1u << insn->src_reg; + u32 spi; + + if (insn->code == 0) + return 0; + if (env->log.level & BPF_LOG_LEVEL) { + verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); + verbose(env, "%d: ", idx); + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + } + + if (class == BPF_ALU || class == BPF_ALU64) { + if (!(*reg_mask & dreg)) + return 0; + if (opcode == BPF_MOV) { + if (BPF_SRC(insn->code) == BPF_X) { + /* dreg = sreg + * dreg needs precision after this insn + * sreg needs precision before this insn + */ + *reg_mask &= ~dreg; + *reg_mask |= sreg; + } else { + /* dreg = K + * dreg needs precision after this insn. + * Corresponding register is already marked + * as precise=true in this verifier state. + * No further markings in parent are necessary + */ + *reg_mask &= ~dreg; + } + } else { + if (BPF_SRC(insn->code) == BPF_X) { + /* dreg += sreg + * both dreg and sreg need precision + * before this insn + */ + *reg_mask |= sreg; + } /* else dreg += K + * dreg still needs precision before this insn + */ + } + } else if (class == BPF_LDX) { + if (!(*reg_mask & dreg)) + return 0; + *reg_mask &= ~dreg; + + /* scalars can only be spilled into stack w/o losing precision. + * Load from any other memory can be zero extended. + * The desire to keep that precision is already indicated + * by 'precise' mark in corresponding register of this state. + * No further tracking necessary. + */ + if (insn->src_reg != BPF_REG_FP) + return 0; + if (BPF_SIZE(insn->code) != BPF_DW) + return 0; + + /* dreg = *(u64 *)[fp - off] was a fill from the stack. + * that [fp - off] slot contains scalar that needs to be + * tracked with precision + */ + spi = (-insn->off - 1) / BPF_REG_SIZE; + if (spi >= 64) { + verbose(env, "BUG spi %d\n", spi); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + *stack_mask |= 1ull << spi; + } else if (class == BPF_STX) { + if (*reg_mask & dreg) + /* stx shouldn't be using _scalar_ dst_reg + * to access memory. It means backtracking + * encountered a case of pointer subtraction. + */ + return -ENOTSUPP; + /* scalars can only be spilled into stack */ + if (insn->dst_reg != BPF_REG_FP) + return 0; + if (BPF_SIZE(insn->code) != BPF_DW) + return 0; + spi = (-insn->off - 1) / BPF_REG_SIZE; + if (spi >= 64) { + verbose(env, "BUG spi %d\n", spi); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + if (!(*stack_mask & (1ull << spi))) + return 0; + *stack_mask &= ~(1ull << spi); + *reg_mask |= sreg; + } else if (class == BPF_JMP || class == BPF_JMP32) { + if (opcode == BPF_CALL) { + if (insn->src_reg == BPF_PSEUDO_CALL) + return -ENOTSUPP; + /* regular helper call sets R0 */ + *reg_mask &= ~1; + if (*reg_mask & 0x3f) { + /* if backtracing was looking for registers R1-R5 + * they should have been found already. + */ + verbose(env, "BUG regs %x\n", *reg_mask); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + } else if (opcode == BPF_EXIT) { + return -ENOTSUPP; + } + } else if (class == BPF_LD) { + if (!(*reg_mask & dreg)) + return 0; + *reg_mask &= ~dreg; + /* It's ld_imm64 or ld_abs or ld_ind. + * For ld_imm64 no further tracking of precision + * into parent is necessary + */ + if (mode == BPF_IND || mode == BPF_ABS) + /* to be analyzed */ + return -ENOTSUPP; + } else if (class == BPF_ST) { + if (*reg_mask & dreg) + /* likely pointer subtraction */ + return -ENOTSUPP; + } + return 0; +} + +/* the scalar precision tracking algorithm: + * . at the start all registers have precise=false. + * . scalar ranges are tracked as normal through alu and jmp insns. + * . once precise value of the scalar register is used in: + * . ptr + scalar alu + * . if (scalar cond K|scalar) + * . helper_call(.., scalar, ...) where ARG_CONST is expected + * backtrack through the verifier states and mark all registers and + * stack slots with spilled constants that these scalar regisers + * should be precise. + * . during state pruning two registers (or spilled stack slots) + * are equivalent if both are not precise. + * + * Note the verifier cannot simply walk register parentage chain, + * since many different registers and stack slots could have been + * used to compute single precise scalar. + * + * The approach of starting with precise=true for all registers and then + * backtrack to mark a register as not precise when the verifier detects + * that program doesn't care about specific value (e.g., when helper + * takes register as ARG_ANYTHING parameter) is not safe. + * + * It's ok to walk single parentage chain of the verifier states. + * It's possible that this backtracking will go all the way till 1st insn. + * All other branches will be explored for needing precision later. + * + * The backtracking needs to deal with cases like: + * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) + * r9 -= r8 + * r5 = r9 + * if r5 > 0x79f goto pc+7 + * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) + * r5 += 1 + * ... + * call bpf_perf_event_output#25 + * where .arg5_type = ARG_CONST_SIZE_OR_ZERO + * + * and this case: + * r6 = 1 + * call foo // uses callee's r6 inside to compute r0 + * r0 += r6 + * if r0 == 0 goto + * + * to track above reg_mask/stack_mask needs to be independent for each frame. + * + * Also if parent's curframe > frame where backtracking started, + * the verifier need to mark registers in both frames, otherwise callees + * may incorrectly prune callers. This is similar to + * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") + * + * For now backtracking falls back into conservative marking. + */ +static void mark_all_scalars_precise(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) +{ + struct bpf_func_state *func; + struct bpf_reg_state *reg; + int i, j; + + /* big hammer: mark all scalars precise in this path. + * pop_stack may still get !precise scalars. + */ + for (; st; st = st->parent) + for (i = 0; i <= st->curframe; i++) { + func = st->frame[i]; + for (j = 0; j < BPF_REG_FP; j++) { + reg = &func->regs[j]; + if (reg->type != SCALAR_VALUE) + continue; + reg->precise = true; + } + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { + if (func->stack[j].slot_type[0] != STACK_SPILL) + continue; + reg = &func->stack[j].spilled_ptr; + if (reg->type != SCALAR_VALUE) + continue; + reg->precise = true; + } + } +} + +static int mark_chain_precision(struct bpf_verifier_env *env, int regno) +{ + struct bpf_verifier_state *st = env->cur_state; + int first_idx = st->first_insn_idx; + int last_idx = env->insn_idx; + struct bpf_func_state *func; + struct bpf_reg_state *reg; + u32 reg_mask = 1u << regno; + u64 stack_mask = 0; + bool skip_first = true; + int i, err; + + if (!env->allow_ptr_leaks) + /* backtracking is root only for now */ + return 0; + + func = st->frame[st->curframe]; + reg = &func->regs[regno]; + if (reg->type != SCALAR_VALUE) { + WARN_ONCE(1, "backtracing misuse"); + return -EFAULT; + } + if (reg->precise) + return 0; + func->regs[regno].precise = true; + + for (;;) { + DECLARE_BITMAP(mask, 64); + bool new_marks = false; + u32 history = st->jmp_history_cnt; + + if (env->log.level & BPF_LOG_LEVEL) + verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); + for (i = last_idx;;) { + if (skip_first) { + err = 0; + skip_first = false; + } else { + err = backtrack_insn(env, i, ®_mask, &stack_mask); + } + if (err == -ENOTSUPP) { + mark_all_scalars_precise(env, st); + return 0; + } else if (err) { + return err; + } + if (!reg_mask && !stack_mask) + /* Found assignment(s) into tracked register in this state. + * Since this state is already marked, just return. + * Nothing to be tracked further in the parent state. + */ + return 0; + if (i == first_idx) + break; + i = get_prev_insn_idx(st, i, &history); + if (i >= env->prog->len) { + /* This can happen if backtracking reached insn 0 + * and there are still reg_mask or stack_mask + * to backtrack. + * It means the backtracking missed the spot where + * particular register was initialized with a constant. + */ + verbose(env, "BUG backtracking idx %d\n", i); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + } + st = st->parent; + if (!st) + break; + + func = st->frame[st->curframe]; + bitmap_from_u64(mask, reg_mask); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; + if (reg->type != SCALAR_VALUE) + continue; + if (!reg->precise) + new_marks = true; + reg->precise = true; + } + + bitmap_from_u64(mask, stack_mask); + for_each_set_bit(i, mask, 64) { + if (i >= func->allocated_stack / BPF_REG_SIZE) { + /* This can happen if backtracking + * is propagating stack precision where + * caller has larger stack frame + * than callee, but backtrack_insn() should + * have returned -ENOTSUPP. + */ + verbose(env, "BUG spi %d stack_size %d\n", + i, func->allocated_stack); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + + if (func->stack[i].slot_type[0] != STACK_SPILL) + continue; + reg = &func->stack[i].spilled_ptr; + if (reg->type != SCALAR_VALUE) + continue; + if (!reg->precise) + new_marks = true; + reg->precise = true; + } + if (env->log.level & BPF_LOG_LEVEL) { + print_verifier_state(env, func); + verbose(env, "parent %s regs=%x stack=%llx marks\n", + new_marks ? "didn't have" : "already had", + reg_mask, stack_mask); + } + + if (!new_marks) + break; + + last_idx = st->last_insn_idx; + first_idx = st->first_insn_idx; + } + return 0; +} + + static bool is_spillable_regtype(enum bpf_reg_type type) { switch (type) { @@ -1435,6 +1847,7 @@ static int check_stack_write(struct bpf_verifier_env *env, { struct bpf_func_state *cur; /* state of the current function */ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; + u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; struct bpf_reg_state *reg = NULL; err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), @@ -1457,6 +1870,17 @@ static int check_stack_write(struct bpf_verifier_env *env, if (reg && size == BPF_REG_SIZE && register_is_const(reg) && !register_is_null(reg) && env->allow_ptr_leaks) { + if (dst_reg != BPF_REG_FP) { + /* The backtracking logic can only recognize explicit + * stack slot address like [fp - 8]. Other spill of + * scalar via different register has to be conervative. + * Backtrack from here and mark all registers as precise + * that contributed into 'reg' being a constant. + */ + err = mark_chain_precision(env, value_regno); + if (err) + return err; + } save_register_state(state, spi, reg); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ @@ -1529,8 +1953,13 @@ static int check_stack_write(struct bpf_verifier_env *env, state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; /* when we zero initialize stack slots mark them as such */ - if (reg && register_is_null(reg)) + if (reg && register_is_null(reg)) { + /* backtracking doesn't work for STACK_ZERO yet. */ + err = mark_chain_precision(env, value_regno); + if (err) + return err; type = STACK_ZERO; + } /* Mark slots affected by this stack write. */ for (i = 0; i < size; i++) @@ -1610,6 +2039,17 @@ static int check_stack_read(struct bpf_verifier_env *env, * so the whole register == const_zero */ __mark_reg_const_zero(&state->regs[value_regno]); + /* backtracking doesn't support STACK_ZERO yet, + * so mark it precise here, so that later + * backtracking can stop here. + * Backtracking may not need this if this register + * doesn't participate in pointer adjustment. + * Forward propagation of precise flag is not + * necessary either. This mark is only to stop + * backtracking. Any register that contributed + * to const 0 was marked precise before spill. + */ + state->regs[value_regno].precise = true; } else { /* have read misc data from the stack */ mark_reg_unknown(env, state->regs, value_regno); @@ -2925,6 +3365,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta); + if (!err) + err = mark_chain_precision(env, regno); } else if (arg_type_is_int_ptr(arg_type)) { int size = int_ptr_type_to_size(arg_type); @@ -4361,6 +4803,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code); + int err; dst_reg = ®s[insn->dst_reg]; src_reg = NULL; @@ -4387,11 +4830,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, * This is legal, but we have to reverse our * src/dest handling in computing the range */ + err = mark_chain_precision(env, insn->dst_reg); + if (err) + return err; return adjust_ptr_min_max_vals(env, insn, src_reg, dst_reg); } } else if (ptr_reg) { /* pointer += scalar */ + err = mark_chain_precision(env, insn->src_reg); + if (err) + return err; return adjust_ptr_min_max_vals(env, insn, dst_reg, src_reg); } @@ -5348,6 +5797,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, tnum_is_const(src_reg->var_off)) pred = is_branch_taken(dst_reg, src_reg->var_off.value, opcode, is_jmp32); + if (pred >= 0) { + err = mark_chain_precision(env, insn->dst_reg); + if (BPF_SRC(insn->code) == BPF_X && !err) + err = mark_chain_precision(env, insn->src_reg); + if (err) + return err; + } if (pred == 1) { /* only follow the goto, ignore fall-through */ *insn_idx += insn->off; @@ -5825,6 +6281,11 @@ peek_stack: goto peek_stack; else if (ret < 0) goto err_free; + /* unconditional jmp is not a good pruning point, + * but it's marked, since backtracking needs + * to record jmp history in is_state_visited(). + */ + init_explored_state(env, t + insns[t].off + 1); /* tell verifier to check for equivalent states * after every call and jump */ @@ -6325,6 +6786,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, switch (rold->type) { case SCALAR_VALUE: if (rcur->type == SCALAR_VALUE) { + if (!rold->precise && !rcur->precise) + return true; /* new val must satisfy old val knowledge */ return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); @@ -6675,6 +7138,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) int i, j, err, states_cnt = 0; bool add_new_state = false; + cur->last_insn_idx = env->prev_insn_idx; if (!env->insn_aux_data[insn_idx].prune_point) /* this 'insn_idx' instruction wasn't marked, so we will not * be doing state search here @@ -6791,10 +7255,10 @@ next: env->max_states_per_insn = states_cnt; if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) - return 0; + return push_jmp_history(env, cur); if (!add_new_state) - return 0; + return push_jmp_history(env, cur); /* There were no equivalent states, remember the current one. * Technically the current state is not proven to be safe yet, @@ -6824,7 +7288,10 @@ next: new->insn_idx = insn_idx; WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); + cur->parent = new; + cur->first_insn_idx = insn_idx; + clear_jmp_history(cur); new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all @@ -6904,6 +7371,7 @@ static int do_check(struct bpf_verifier_env *env) struct bpf_reg_state *regs; int insn_cnt = env->prog->len; bool do_print_state = false; + int prev_insn_idx = -1; env->prev_linfo = NULL; @@ -6929,6 +7397,7 @@ static int do_check(struct bpf_verifier_env *env) u8 class; int err; + env->prev_insn_idx = prev_insn_idx; if (env->insn_idx >= insn_cnt) { verbose(env, "invalid insn idx %d insn_cnt %d\n", env->insn_idx, insn_cnt); @@ -7001,6 +7470,7 @@ static int do_check(struct bpf_verifier_env *env) regs = cur_regs(env); env->insn_aux_data[env->insn_idx].seen = true; + prev_insn_idx = env->insn_idx; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); @@ -7174,7 +7644,6 @@ static int do_check(struct bpf_verifier_env *env) if (state->curframe) { /* exit from nested function */ - env->prev_insn_idx = env->insn_idx; err = prepare_func_exit(env, &env->insn_idx); if (err) return err; @@ -7206,7 +7675,7 @@ static int do_check(struct bpf_verifier_env *env) return err; process_bpf_exit: update_branch_counts(env, env->cur_state); - err = pop_stack(env, &env->prev_insn_idx, + err = pop_stack(env, &prev_insn_idx, &env->insn_idx); if (err < 0) { if (err != -ENOENT) -- cgit v1.2.3 From 0b385a0c3bd3f6d1044728b732bfc7dfb01c9fb5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Jun 2019 10:18:28 +0200 Subject: PM: suspend: Rename pm_suspend_via_s2idle() The name of pm_suspend_via_s2idle() is confusing, as it doesn't reflect the purpose of the function precisely enough and it is very similar to pm_suspend_via_firmware(), which has a different purpose, so rename it as pm_suspend_default_s2idle() and update its only caller, i8042_register_ports(), accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 2 +- include/linux/suspend.h | 4 ++-- kernel/power/suspend.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 6462f1798fbb..8384abc41d7f 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1410,7 +1410,7 @@ static void __init i8042_register_ports(void) * behavior on many platforms using suspend-to-RAM (ACPI S3) * by default. */ - if (pm_suspend_via_s2idle() && i == I8042_KBD_PORT_NO) + if (pm_suspend_default_s2idle() && i == I8042_KBD_PORT_NO) device_set_wakeup_enable(&serio->dev, true); } } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 05645f726815..d07ae7fb9315 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -282,7 +282,7 @@ static inline bool idle_should_enter_s2idle(void) return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } -extern bool pm_suspend_via_s2idle(void); +extern bool pm_suspend_default_s2idle(void); extern void __init pm_states_init(void); extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); @@ -314,7 +314,7 @@ static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } -static inline bool pm_suspend_via_s2idle(void) { return false; } +static inline bool pm_suspend_default_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..8703b0ca4986 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -62,16 +62,16 @@ enum s2idle_states __read_mostly s2idle_state; static DEFINE_RAW_SPINLOCK(s2idle_lock); /** - * pm_suspend_via_s2idle - Check if suspend-to-idle is the default suspend. + * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend. * * Return 'true' if suspend-to-idle has been selected as the default system * suspend method. */ -bool pm_suspend_via_s2idle(void) +bool pm_suspend_default_s2idle(void) { return mem_sleep_current == PM_SUSPEND_TO_IDLE; } -EXPORT_SYMBOL_GPL(pm_suspend_via_s2idle); +EXPORT_SYMBOL_GPL(pm_suspend_default_s2idle); void s2idle_set_ops(const struct platform_s2idle_ops *ops) { -- cgit v1.2.3 From 387e3746d01c34457d6a73688acd90428725070b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 7 Jun 2019 17:24:38 +0300 Subject: locks: eliminate false positive conflicts for write lease check_conflicting_open() is checking for existing fd's open for read or for write before allowing to take a write lease. The check that was implemented using i_count and d_count is an approximation that has several false positives. For example, overlayfs since v4.19, takes an extra reference on the dentry; An open with O_PATH takes a reference on the dentry although the file cannot be read nor written. Change the implementation to use i_readcount and i_writecount to eliminate the false positive conflicts and allow a write lease to be taken on an overlayfs file. The change of behavior with existing fd's open with O_PATH is symmetric w.r.t. current behavior of lease breakers - an open with O_PATH currently does not break a write lease. This increases the size of struct inode by 4 bytes on 32bit archs when CONFIG_FILE_LOCKING is defined and CONFIG_IMA was not already defined. Signed-off-by: Amir Goldstein Signed-off-by: Jeff Layton --- fs/locks.c | 42 +++++++++++++++++++++++++++--------------- include/linux/fs.h | 4 ++-- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index 0cc2b9f30e22..de87a3231789 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1763,10 +1763,10 @@ int fcntl_getlease(struct file *filp) } /** - * check_conflicting_open - see if the given dentry points to a file that has + * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. - * @dentry: dentry to check + * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * @@ -1774,30 +1774,42 @@ int fcntl_getlease(struct file *filp) * conflict with the lease we're trying to set. */ static int -check_conflicting_open(const struct dentry *dentry, const long arg, int flags) +check_conflicting_open(struct file *filp, const long arg, int flags) { - int ret = 0; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); + int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) - return -EAGAIN; + if (arg == F_RDLCK) + return inode_is_open_for_write(inode) ? -EAGAIN : 0; + else if (arg != F_WRLCK) + return 0; + + /* + * Make sure that only read/write count is from lease requestor. + * Note that this will result in denying write leases when i_writecount + * is negative, which is what we want. (We shouldn't grant write leases + * on files open for execution.) + */ + if (filp->f_mode & FMODE_WRITE) + self_wcount = 1; + else if (filp->f_mode & FMODE_READ) + self_rcount = 1; - if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || - (atomic_read(&inode->i_count) > 1))) - ret = -EAGAIN; + if (atomic_read(&inode->i_writecount) != self_wcount || + atomic_read(&inode->i_readcount) != self_rcount) + return -EAGAIN; - return ret; + return 0; } static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1832,7 +1844,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) goto out; @@ -1889,7 +1901,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * precedes these checks. */ smp_mb(); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) { locks_unlink_lock_ctx(lease); goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..419e327022de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -694,7 +694,7 @@ struct inode { atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; -#ifdef CONFIG_IMA +#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; /* struct files open RO */ #endif union { @@ -2890,7 +2890,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode) return atomic_read(&inode->i_writecount) > 0; } -#ifdef CONFIG_IMA +#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { BUG_ON(!atomic_read(&inode->i_readcount)); -- cgit v1.2.3 From 82828b88f081a0084cd65f90a4a1d3652f5adb66 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Jun 2019 09:41:02 +0300 Subject: flow_dissector: add support for ingress ifindex dissection Add new key meta that contains ingress ifindex value and add a function to dissect this from skb. The key and function is prepared to cover other potential skb metadata values dissection. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/net/flow_dissector.h | 9 +++++++++ net/core/flow_dissector.c | 16 ++++++++++++++++ 3 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 28bdaf978e72..b5d427b149c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1320,6 +1320,10 @@ skb_flow_dissect_flow_keys_basic(const struct net *net, data, proto, nhoff, hlen, flags); } +void skb_flow_dissect_meta(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d7ce647a8ca9..02478e48fae4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -200,6 +200,14 @@ struct flow_dissector_key_ip { __u8 ttl; }; +/** + * struct flow_dissector_key_meta: + * @ingress_ifindex: ingress ifindex + */ +struct flow_dissector_key_meta { + int ingress_ifindex; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -225,6 +233,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ + FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c0559af9e5e5..01ad60b5aa75 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -199,6 +199,22 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +void skb_flow_dissect_meta(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_meta *meta; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META)) + return; + + meta = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_META, + target_container); + meta->ingress_ifindex = skb->skb_iif; +} +EXPORT_SYMBOL(skb_flow_dissect_meta); + static void skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, struct flow_dissector *flow_dissector, -- cgit v1.2.3 From 896f1950e5944532b971d880a6bae7fba3b6a8d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2019 16:10:15 +0100 Subject: keys: Provide request_key_rcu() Provide a request_key_rcu() function that can be used to request a key under RCU conditions. It can only search and check permissions; it cannot allocate a new key, upcall or wait for an upcall to complete. It may return a partially constructed key. Signed-off-by: David Howells --- Documentation/security/keys/core.rst | 10 +++++++ Documentation/security/keys/request-key.rst | 9 ++++++ include/linux/key.h | 3 ++ security/keys/request_key.c | 44 +++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index 82dd457ff78d..003f1452a5b7 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1147,6 +1147,16 @@ payload contents" for more information. case error ERESTARTSYS will be returned. + * To search for a key under RCU conditions, call:: + + struct key *request_key_rcu(const struct key_type *type, + const char *description); + + which is similar to request_key() except that it does not check for keys + that are under construction and it will not call out to userspace to + construct a key if it can't find a match. + + * When it is no longer required, the key should be released using:: void key_put(struct key *key); diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst index 07af991463b5..7caedc4d29f1 100644 --- a/Documentation/security/keys/request-key.rst +++ b/Documentation/security/keys/request-key.rst @@ -36,6 +36,11 @@ or:: size_t callout_len, void *aux); +or:: + + struct key *request_key_rcu(const struct key_type *type, + const char *description); + Or by userspace invoking the request_key system call:: key_serial_t request_key(const char *type, @@ -57,6 +62,10 @@ The two async in-kernel calls may return keys that are still in the process of being constructed. The two non-async ones will wait for construction to complete first. +The request_key_rcu() call is like the in-kernel request_key() call, except +that it doesn't check for keys that are under construction and doesn't attempt +to construct missing keys. + The userspace interface links the key to a keyring associated with the process to prevent the key from going away, and returns the serial number of the key to the caller. diff --git a/include/linux/key.h b/include/linux/key.h index 612e1cf84049..3604a554df99 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -274,6 +274,9 @@ extern struct key *request_key(struct key_type *type, const char *description, const char *callout_info); +extern struct key *request_key_rcu(struct key_type *type, + const char *description); + extern struct key *request_key_with_auxdata(struct key_type *type, const char *description, const void *callout_info, diff --git a/security/keys/request_key.c b/security/keys/request_key.c index bf1d223ec21c..b4b3677657d6 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -756,3 +756,47 @@ struct key *request_key_async_with_auxdata(struct key_type *type, callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA); } EXPORT_SYMBOL(request_key_async_with_auxdata); + +/** + * request_key_rcu - Request key from RCU-read-locked context + * @type: The type of key we want. + * @description: The name of the key we want. + * + * Request a key from a context that we may not sleep in (such as RCU-mode + * pathwalk). Keys under construction are ignored. + * + * Return a pointer to the found key if successful, -ENOKEY if we couldn't find + * a key or some other error if the key found was unsuitable or inaccessible. + */ +struct key *request_key_rcu(struct key_type *type, const char *description) +{ + struct keyring_search_context ctx = { + .index_key.type = type, + .index_key.description = description, + .index_key.desc_len = strlen(description), + .cred = current_cred(), + .match_data.cmp = key_default_cmp, + .match_data.raw_data = description, + .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .flags = (KEYRING_SEARCH_DO_STATE_CHECK | + KEYRING_SEARCH_SKIP_EXPIRED), + }; + struct key *key; + key_ref_t key_ref; + + kenter("%s,%s", type->name, description); + + /* search all the process keyrings for a key */ + key_ref = search_process_keyrings_rcu(&ctx); + if (IS_ERR(key_ref)) { + key = ERR_CAST(key_ref); + if (PTR_ERR(key_ref) == -EAGAIN) + key = ERR_PTR(-ENOKEY); + } else { + key = key_ref_to_ptr(key_ref); + } + + kleave(" = %p", key); + return key; +} +EXPORT_SYMBOL(request_key_rcu); -- cgit v1.2.3 From 7743c48e54ee9be9c799cbf3b8e3e9f2b8d19e72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2019 16:10:15 +0100 Subject: keys: Cache result of request_key*() temporarily in task_struct If a filesystem uses keys to hold authentication tokens, then it needs a token for each VFS operation that might perform an authentication check - either by passing it to the server, or using to perform a check based on authentication data cached locally. For open files this isn't a problem, since the key should be cached in the file struct since it represents the subject performing operations on that file descriptor. During pathwalk, however, there isn't anywhere to cache the key, except perhaps in the nameidata struct - but that isn't exposed to the filesystems. Further, a pathwalk can incur a lot of operations, calling one or more of the following, for instance: ->lookup() ->permission() ->d_revalidate() ->d_automount() ->get_acl() ->getxattr() on each dentry/inode it encounters - and each one may need to call request_key(). And then, at the end of pathwalk, it will call the actual operation: ->mkdir() ->mknod() ->getattr() ->open() ... which may need to go and get the token again. However, it is very likely that all of the operations on a single dentry/inode - and quite possibly a sequence of them - will all want to use the same authentication token, which suggests that caching it would be a good idea. To this end: (1) Make it so that a positive result of request_key() and co. that didn't require upcalling to userspace is cached temporarily in task_struct. (2) The cache is 1 deep, so a new result displaces the old one. (3) The key is released by exit and by notify-resume. (4) The cache is cleared in a newly forked process. Signed-off-by: David Howells --- Documentation/security/keys/request-key.rst | 7 +++++- include/linux/sched.h | 5 ++++ include/linux/tracehook.h | 7 ++++++ kernel/cred.c | 9 +++++++ security/keys/Kconfig | 18 ++++++++++++++ security/keys/request_key.c | 37 +++++++++++++++++++++++++++++ 6 files changed, 82 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst index 7caedc4d29f1..45049abdf290 100644 --- a/Documentation/security/keys/request-key.rst +++ b/Documentation/security/keys/request-key.rst @@ -176,6 +176,9 @@ The process stops immediately a valid key is found with permission granted to use it. Any error from a previous match attempt is discarded and the key is returned. +When request_key() is invoked, if CONFIG_KEYS_REQUEST_CACHE=y, a per-task +one-key cache is first checked for a match. + When search_process_keyrings() is invoked, it performs the following searches until one succeeds: @@ -195,7 +198,9 @@ until one succeeds: c) The calling process's session keyring is searched. The moment one succeeds, all pending errors are discarded and the found key is -returned. +returned. If CONFIG_KEYS_REQUEST_CACHE=y, then that key is placed in the +per-task cache, displacing the previous key. The cache is cleared on exit or +just prior to resumption of userspace. Only if all these fail does the whole thing fail with the highest priority error. Note that several errors may have come from LSM. diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..e5f18857dd53 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -831,6 +831,11 @@ struct task_struct { /* Effective (overridable) subjective task credentials (COW): */ const struct cred __rcu *cred; +#ifdef CONFIG_KEYS + /* Cached requested key. */ + struct key *cached_requested_key; +#endif + /* * executable name, excluding path. * diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index df20f8bdbfa3..81824467e6a6 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -187,6 +187,13 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) if (unlikely(current->task_works)) task_work_run(); +#ifdef CONFIG_KEYS_REQUEST_CACHE + if (unlikely(current->cached_requested_key)) { + key_put(current->cached_requested_key); + current->cached_requested_key = NULL; + } +#endif + mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); } diff --git a/kernel/cred.c b/kernel/cred.c index 3bd40de9e192..26da7e77098f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -174,6 +174,11 @@ void exit_creds(struct task_struct *tsk) validate_creds(cred); alter_cred_subscribers(cred, -1); put_cred(cred); + +#ifdef CONFIG_KEYS_REQUEST_CACHE + key_put(current->cached_requested_key); + current->cached_requested_key = NULL; +#endif } /** @@ -327,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; +#ifdef CONFIG_KEYS_REQUEST_CACHE + p->cached_requested_key = NULL; +#endif + if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 6462e6654ccf..12f70b556d09 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -24,6 +24,24 @@ config KEYS_COMPAT def_bool y depends on COMPAT && KEYS +config KEYS_REQUEST_CACHE + bool "Enable temporary caching of the last request_key() result" + depends on KEYS + help + This option causes the result of the last successful request_key() + call that didn't upcall to the kernel to be cached temporarily in the + task_struct. The cache is cleared by exit and just prior to the + resumption of userspace. + + This allows the key used for multiple step processes where each step + wants to request a key that is likely the same as the one requested + by the last step to save on the searching. + + An example of such a process is a pathwalk through a network + filesystem in which each method needs to request an authentication + key. Pathwalk will call multiple methods for each dentry traversed + (permission, d_revalidate, lookup, getxattr, getacl, ...). + config PERSISTENT_KEYRINGS bool "Enable register of persistent per-UID keyrings" depends on KEYS diff --git a/security/keys/request_key.c b/security/keys/request_key.c index b4b3677657d6..f289982cb5db 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -22,6 +22,31 @@ #define key_negative_timeout 60 /* default timeout on a negative key's existence */ +static struct key *check_cached_key(struct keyring_search_context *ctx) +{ +#ifdef CONFIG_KEYS_REQUEST_CACHE + struct key *key = current->cached_requested_key; + + if (key && + ctx->match_data.cmp(key, &ctx->match_data) && + !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED)))) + return key_get(key); +#endif + return NULL; +} + +static void cache_requested_key(struct key *key) +{ +#ifdef CONFIG_KEYS_REQUEST_CACHE + struct task_struct *t = current; + + key_put(t->cached_requested_key); + t->cached_requested_key = key_get(key); + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +#endif +} + /** * complete_request_key - Complete the construction of a key. * @authkey: The authorisation key. @@ -562,6 +587,10 @@ struct key *request_key_and_link(struct key_type *type, } } + key = check_cached_key(&ctx); + if (key) + return key; + /* search all the process keyrings for a key */ rcu_read_lock(); key_ref = search_process_keyrings_rcu(&ctx); @@ -587,6 +616,9 @@ struct key *request_key_and_link(struct key_type *type, goto error_free; } } + + /* Only cache the key on immediate success */ + cache_requested_key(key); } else if (PTR_ERR(key_ref) != -EAGAIN) { key = ERR_CAST(key_ref); } else { @@ -786,6 +818,10 @@ struct key *request_key_rcu(struct key_type *type, const char *description) kenter("%s,%s", type->name, description); + key = check_cached_key(&ctx); + if (key) + return key; + /* search all the process keyrings for a key */ key_ref = search_process_keyrings_rcu(&ctx); if (IS_ERR(key_ref)) { @@ -794,6 +830,7 @@ struct key *request_key_rcu(struct key_type *type, const char *description) key = ERR_PTR(-ENOKEY); } else { key = key_ref_to_ptr(key_ref); + cache_requested_key(key); } kleave(" = %p", key); -- cgit v1.2.3 From 3ae72562ad917df36a1b1247d749240e3b4865db Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Jun 2019 23:45:09 -0400 Subject: ext4: optimize case-insensitive lookups Temporarily cache a casefolded version of the file name under lookup in ext4_filename, to avoid repeatedly casefolding it. I got up to 30% speedup on lookups of large directories (>100k entries), depending on the length of the string under lookup. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 2 +- fs/ext4/ext4.h | 39 ++++++++++++++++++++++++++++++++++++--- fs/ext4/namei.c | 43 ++++++++++++++++++++++++++++++++++++++----- fs/unicode/utf8-core.c | 28 ++++++++++++++++++++++++++++ include/linux/unicode.h | 3 +++ 5 files changed, 106 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1f7784bee42a..770a1e6d4672 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -677,7 +677,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, return memcmp(str, name->name, len); } - return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr); + return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); } static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ceb74093e138..7215a2a2a0de 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2078,6 +2078,9 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif +#ifdef CONFIG_UNICODE + struct fscrypt_str cf_name; +#endif }; #define fname_name(p) ((p)->disk_name.name) @@ -2303,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); +#ifdef CONFIG_UNICODE +extern void ext4_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct fscrypt_str *fname); +#endif + #ifdef CONFIG_FS_ENCRYPTION static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, const struct fscrypt_name *src) @@ -2329,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif return 0; } @@ -2344,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name); +#endif return 0; } @@ -2357,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->crypto_buf.name = NULL; fname->usr_fname = NULL; fname->disk_name.name = NULL; + +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif } #else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, @@ -2367,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif + return 0; } @@ -2377,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +static inline void ext4_fname_free_filename(struct ext4_filename *fname) +{ +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} #endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ @@ -3120,8 +3153,8 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry); + const struct qstr *fname, + const struct qstr *entry, bool quick); #define S_SHIFT 12 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cd01c4a67ffb..4909ced4e672 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1259,19 +1259,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) #ifdef CONFIG_UNICODE /* * Test whether a case-insensitive directory entry matches the filename - * being searched for. + * being searched for. If quick is set, assume the name being looked up + * is already in the casefolded form. * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry) + const struct qstr *entry, bool quick) { const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); const struct unicode_map *um = sbi->s_encoding; int ret; - ret = utf8_strncasecmp(um, name, entry); + if (quick) + ret = utf8_strncasecmp_folded(um, name, entry); + else + ret = utf8_strncasecmp(um, name, entry); + if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. @@ -1287,6 +1292,27 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, return ret; } + +void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, + struct fscrypt_str *cf_name) +{ + if (!IS_CASEFOLDED(dir)) { + cf_name->name = NULL; + return; + } + + cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!cf_name->name) + return; + + cf_name->len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + iname, cf_name->name, + EXT4_NAME_LEN); + if (cf_name->len <= 0) { + kfree(cf_name->name); + cf_name->name = NULL; + } +} #endif /* @@ -1313,8 +1339,15 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) - return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0); + if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (fname->cf_name.name) { + struct qstr cf = {.name = fname->cf_name.name, + .len = fname->cf_name.len}; + return !ext4_ci_compare(parent, &cf, &entry, true); + } + return !ext4_ci_compare(parent, fname->usr_fname, &entry, + false); + } #endif return fscrypt_match_name(&f, de->name, de->name_len); diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 6afab4fdce90..71ca4d047d65 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um, } EXPORT_SYMBOL(utf8_strncasecmp); +/* String cf is expected to be a valid UTF-8 casefolded + * string. + */ +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur1; + int c1, c2; + int i = 0; + + if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) + return -EINVAL; + + do { + c1 = utf8byte(&cur1); + c2 = cf->name[i++]; + if (c1 < 0) + return -EINVAL; + if (c1 != c2) + return 1; + } while (c1); + + return 0; +} +EXPORT_SYMBOL(utf8_strncasecmp_folded); + int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index aec2c6d800aa..990aa97d8049 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um, int utf8_strncasecmp(const struct unicode_map *um, const struct qstr *s1, const struct qstr *s2); +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1); int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); -- cgit v1.2.3 From f03631da4be33219021323630a8cf788fd066267 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:52:54 -0600 Subject: coresight: Introduce generic platform data helper So far we have hard coded the DT platform parsing code in every driver. Introduce generic helper to parse the information provided by the firmware in a platform agnostic manner, in preparation for the ACPI support. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-catu.c | 13 ++--- drivers/hwtracing/coresight/coresight-etb10.c | 11 ++-- drivers/hwtracing/coresight/coresight-etm3x.c | 12 ++--- drivers/hwtracing/coresight/coresight-etm4x.c | 11 ++-- drivers/hwtracing/coresight/coresight-funnel.c | 11 ++-- drivers/hwtracing/coresight/coresight-platform.c | 58 ++++++++++++++++------ drivers/hwtracing/coresight/coresight-replicator.c | 11 ++-- drivers/hwtracing/coresight/coresight-stm.c | 11 ++-- drivers/hwtracing/coresight/coresight-tmc.c | 13 ++--- drivers/hwtracing/coresight/coresight-tpiu.c | 11 ++-- include/linux/coresight.h | 7 +-- 11 files changed, 83 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 63109c98765c..799ba1dd877e 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -503,17 +503,14 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id) struct coresight_desc catu_desc; struct coresight_platform_data *pdata = NULL; struct device *dev = &adev->dev; - struct device_node *np = dev->of_node; void __iomem *base; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) { - ret = PTR_ERR(pdata); - goto out; - } - dev->platform_data = pdata; + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out; } + dev->platform_data = pdata; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) { diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 0c9161fe4233..cb8cb03e0cdb 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -732,14 +732,11 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) struct etb_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - adev->dev.platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index fa2f1417cafb..fa2164ff07c2 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -790,20 +790,16 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) struct etm_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - - adev->dev.platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; drvdata->use_cp14 = fwnode_property_read_bool(dev->fwnode, "arm,cp14"); dev_set_drvdata(dev, drvdata); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 77d1d837da52..4355b2e8c308 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1084,18 +1084,15 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) struct etmv4_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - adev->dev.platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; dev_set_drvdata(dev, drvdata); diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 3423042e7a52..fc033fdb6cd5 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -187,14 +187,11 @@ static int funnel_probe(struct device *dev, struct resource *res) struct coresight_platform_data *pdata = NULL; struct funnel_drvdata *drvdata; struct coresight_desc desc = { 0 }; - struct device_node *np = dev->of_node; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - dev->platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + dev->platform_data = pdata; if (is_of_node(dev_fwnode(dev)) && of_device_is_compatible(dev->of_node, "arm,coresight-funnel")) diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 4c31299607cf..5d78f4fbd97d 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -230,23 +230,16 @@ static int of_coresight_parse_endpoint(struct device *dev, return ret; } -struct coresight_platform_data * -of_get_coresight_platform_data(struct device *dev, - const struct device_node *node) +static int of_get_coresight_platform_data(struct device *dev, + struct coresight_platform_data *pdata) { int ret = 0; - struct coresight_platform_data *pdata; struct coresight_connection *conn; struct device_node *ep = NULL; const struct device_node *parent = NULL; bool legacy_binding = false; + struct device_node *node = dev->of_node; - pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return ERR_PTR(-ENOMEM); - - /* Use device name as sysfs handle */ - pdata->name = dev_name(dev); pdata->cpu = of_coresight_get_cpu(node); /* Get the number of input and output port for this component */ @@ -254,11 +247,11 @@ of_get_coresight_platform_data(struct device *dev, /* If there are no output connections, we are done */ if (!pdata->nr_outport) - return pdata; + return 0; ret = coresight_alloc_conns(dev, pdata); if (ret) - return ERR_PTR(ret); + return ret; parent = of_coresight_get_output_ports_node(node); /* @@ -292,11 +285,46 @@ of_get_coresight_platform_data(struct device *dev, case 0: break; default: - return ERR_PTR(ret); + return ret; } } - return pdata; + return 0; +} +#else +static inline int +of_get_coresight_platform_data(struct device *dev, + struct coresight_platform_data *pdata) +{ + return -ENOENT; } -EXPORT_SYMBOL_GPL(of_get_coresight_platform_data); #endif + +struct coresight_platform_data * +coresight_get_platform_data(struct device *dev) +{ + int ret = -ENOENT; + struct coresight_platform_data *pdata; + struct fwnode_handle *fwnode = dev_fwnode(dev); + + if (IS_ERR_OR_NULL(fwnode)) + goto error; + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + ret = -ENOMEM; + goto error; + } + + /* Use device name as sysfs handle */ + pdata->name = dev_name(dev); + + if (is_of_node(fwnode)) + ret = of_get_coresight_platform_data(dev, pdata); + + if (!ret) + return pdata; +error: + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(coresight_get_platform_data); diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 7e0514557920..054b33554695 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -177,15 +177,12 @@ static int replicator_probe(struct device *dev, struct resource *res) struct coresight_platform_data *pdata = NULL; struct replicator_drvdata *drvdata; struct coresight_desc desc = { 0 }; - struct device_node *np = dev->of_node; void __iomem *base; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - dev->platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + dev->platform_data = pdata; if (is_of_node(dev_fwnode(dev)) && of_device_is_compatible(dev->of_node, "arm,coresight-replicator")) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 3992a35e34e4..9faa1ed01500 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -809,14 +809,11 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) struct resource ch_res; size_t bitmap_size; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - adev->dev.platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 9c5e615c1486..be0bd98ca8c1 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -397,16 +397,13 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) struct tmc_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) { - ret = PTR_ERR(pdata); - goto out; - } - adev->dev.platform_data = pdata; + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out; } + adev->dev.platform_data = pdata; ret = -ENOMEM; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 4dd3e7f63050..aec0ed7bf924 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -124,14 +124,11 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) struct tpiu_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; - if (np) { - pdata = of_get_coresight_platform_data(dev, np); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); - adev->dev.platform_data = pdata; - } + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + dev->platform_data = pdata; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 62a520df8add..e2b95e05e0be 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -294,14 +294,11 @@ static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} #ifdef CONFIG_OF extern int of_coresight_get_cpu(const struct device_node *node); -extern struct coresight_platform_data * -of_get_coresight_platform_data(struct device *dev, - const struct device_node *node); #else static inline int of_coresight_get_cpu(const struct device_node *node) { return 0; } -static inline struct coresight_platform_data *of_get_coresight_platform_data( - struct device *dev, const struct device_node *node) { return NULL; } #endif +struct coresight_platform_data *coresight_get_platform_data(struct device *dev); + #endif -- cgit v1.2.3 From 91824db2ea2d2bacacd54de55a7faba10c63b166 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:52:55 -0600 Subject: coresight: Make device to CPU mapping generic The CoreSight components ETM and CPU-Debug are always associated with CPUs. Replace the of_coresight_get_cpu() with a platform agnostic helper, in preparation to add ACPI support. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-cpu-debug.c | 3 +-- drivers/hwtracing/coresight/coresight-platform.c | 18 +++++++++++++----- include/linux/coresight.h | 7 +------ 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index e8819d750938..07a1367c733f 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -572,14 +572,13 @@ static int debug_probe(struct amba_device *adev, const struct amba_id *id) struct device *dev = &adev->dev; struct debug_drvdata *drvdata; struct resource *res = &adev->res; - struct device_node *np = adev->dev.of_node; int ret; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; - drvdata->cpu = np ? of_coresight_get_cpu(np) : 0; + drvdata->cpu = coresight_get_cpu(dev); if (per_cpu(debug_drvdata, drvdata->cpu)) { dev_err(dev, "CPU%d drvdata has already been initialized\n", drvdata->cpu); diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 5d78f4fbd97d..ba8c14635c6b 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -151,12 +151,14 @@ static void of_coresight_get_ports(const struct device_node *node, } } -int of_coresight_get_cpu(const struct device_node *node) +static int of_coresight_get_cpu(struct device *dev) { int cpu; struct device_node *dn; - dn = of_parse_phandle(node, "cpu", 0); + if (!dev->of_node) + return 0; + dn = of_parse_phandle(dev->of_node, "cpu", 0); /* Affinity defaults to CPU0 */ if (!dn) return 0; @@ -166,7 +168,6 @@ int of_coresight_get_cpu(const struct device_node *node) /* Affinity to CPU0 if no cpu nodes are found */ return (cpu < 0) ? 0 : cpu; } -EXPORT_SYMBOL_GPL(of_coresight_get_cpu); /* * of_coresight_parse_endpoint : Parse the given output endpoint @ep @@ -240,8 +241,6 @@ static int of_get_coresight_platform_data(struct device *dev, bool legacy_binding = false; struct device_node *node = dev->of_node; - pdata->cpu = of_coresight_get_cpu(node); - /* Get the number of input and output port for this component */ of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport); @@ -300,6 +299,14 @@ of_get_coresight_platform_data(struct device *dev, } #endif +int coresight_get_cpu(struct device *dev) +{ + if (is_of_node(dev->fwnode)) + return of_coresight_get_cpu(dev); + return 0; +} +EXPORT_SYMBOL_GPL(coresight_get_cpu); + struct coresight_platform_data * coresight_get_platform_data(struct device *dev) { @@ -318,6 +325,7 @@ coresight_get_platform_data(struct device *dev) /* Use device name as sysfs handle */ pdata->name = dev_name(dev); + pdata->cpu = coresight_get_cpu(dev); if (is_of_node(fwnode)) ret = of_get_coresight_platform_data(dev, pdata); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e2b95e05e0be..98a4440dea3e 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -292,12 +292,7 @@ static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} #endif -#ifdef CONFIG_OF -extern int of_coresight_get_cpu(const struct device_node *node); -#else -static inline int of_coresight_get_cpu(const struct device_node *node) -{ return 0; } -#endif +extern int coresight_get_cpu(struct device *dev); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); -- cgit v1.2.3 From aff70a45fe3120b08ae459a6e3996346d2766b1f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:52:56 -0600 Subject: coresight: Remove cpu field from platform data CPU field is only used by ETMs and there is a separate API for fetching the same. So, let us use that instead of using the common platform probing helper. Also, remove it from the platform_data. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etm3x.c | 2 +- drivers/hwtracing/coresight/coresight-etm4x.c | 2 +- drivers/hwtracing/coresight/coresight-platform.c | 1 - include/linux/coresight.h | 2 -- 4 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index fa2164ff07c2..722fab9632ef 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -819,7 +819,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) return ret; } - drvdata->cpu = pdata ? pdata->cpu : 0; + drvdata->cpu = coresight_get_cpu(dev); cpus_read_lock(); etmdrvdata[drvdata->cpu] = drvdata; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 4355b2e8c308..03576f3ed22d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1105,7 +1105,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) spin_lock_init(&drvdata->spinlock); - drvdata->cpu = pdata ? pdata->cpu : 0; + drvdata->cpu = coresight_get_cpu(dev); cpus_read_lock(); etmdrvdata[drvdata->cpu] = drvdata; diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index ba8c14635c6b..541e500a83c2 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -325,7 +325,6 @@ coresight_get_platform_data(struct device *dev) /* Use device name as sysfs handle */ pdata->name = dev_name(dev); - pdata->cpu = coresight_get_cpu(dev); if (is_of_node(fwnode)) ret = of_get_coresight_platform_data(dev, pdata); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 98a4440dea3e..bf241dbf99c5 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -91,14 +91,12 @@ union coresight_dev_subtype { /** * struct coresight_platform_data - data harvested from the DT specification - * @cpu: the CPU a source belongs to. Only applicable for ETM/PTMs. * @name: name of the component as shown under sysfs. * @nr_inport: number of input ports for this component. * @nr_outport: number of output ports for this component. * @conns: Array of nr_outport connections from this component */ struct coresight_platform_data { - int cpu; const char *name; int nr_inport; int nr_outport; -- cgit v1.2.3 From 2ede79a6e8a541d1bc7c033b1198f05088e7cefb Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:52:57 -0600 Subject: coresight: Remove name from platform description We are about to use a name independent of the parent AMBA device name. As such, there is no need to have it in the platform description. Let us move this to coresight description instead. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-catu.c | 2 ++ drivers/hwtracing/coresight/coresight-etb10.c | 3 ++- drivers/hwtracing/coresight/coresight-etm3x.c | 1 + drivers/hwtracing/coresight/coresight-etm4x.c | 1 + drivers/hwtracing/coresight/coresight-funnel.c | 1 + drivers/hwtracing/coresight/coresight-platform.c | 3 --- drivers/hwtracing/coresight/coresight-replicator.c | 2 ++ drivers/hwtracing/coresight/coresight-stm.c | 1 + drivers/hwtracing/coresight/coresight-tmc.c | 5 +++-- drivers/hwtracing/coresight/coresight-tpiu.c | 1 + drivers/hwtracing/coresight/coresight.c | 2 +- include/linux/coresight.h | 8 ++++---- 12 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 799ba1dd877e..05c73045532a 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -551,6 +551,8 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id) catu_desc.type = CORESIGHT_DEV_TYPE_HELPER; catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU; catu_desc.ops = &catu_ops; + catu_desc.name = dev_name(dev); + drvdata->csdev = coresight_register(&catu_desc); if (IS_ERR(drvdata->csdev)) ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index cb8cb03e0cdb..8726d6c7663c 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -777,12 +777,13 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &etb_cs_ops; desc.pdata = pdata; desc.dev = dev; + desc.name = dev_name(dev); desc.groups = coresight_etb_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) return PTR_ERR(drvdata->csdev); - drvdata->miscdev.name = pdata->name; + drvdata->miscdev.name = desc.name; drvdata->miscdev.minor = MISC_DYNAMIC_MINOR; drvdata->miscdev.fops = &etb_fops; ret = misc_register(&drvdata->miscdev); diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index 722fab9632ef..101fb01e20de 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -854,6 +854,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &etm_cs_ops; desc.pdata = pdata; desc.dev = dev; + desc.name = dev_name(dev); desc.groups = coresight_etm_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 03576f3ed22d..8adc1485cd89 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1142,6 +1142,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) desc.pdata = pdata; desc.dev = dev; desc.groups = coresight_etmv4_groups; + desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index fc033fdb6cd5..ded33f5e7d43 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -229,6 +229,7 @@ static int funnel_probe(struct device *dev, struct resource *res) desc.ops = &funnel_cs_ops; desc.pdata = pdata; desc.dev = dev; + desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 541e500a83c2..f500de61e7f9 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -323,9 +323,6 @@ coresight_get_platform_data(struct device *dev) goto error; } - /* Use device name as sysfs handle */ - pdata->name = dev_name(dev); - if (is_of_node(fwnode)) ret = of_get_coresight_platform_data(dev, pdata); diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 054b33554695..f28bafd98995 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -220,6 +220,8 @@ static int replicator_probe(struct device *dev, struct resource *res) desc.ops = &replicator_cs_ops; desc.pdata = dev->platform_data; desc.dev = dev; + desc.name = dev_name(dev); + drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 9faa1ed01500..02031d93fb8b 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -871,6 +871,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &stm_cs_ops; desc.pdata = pdata; desc.dev = dev; + desc.name = dev_name(dev); desc.groups = coresight_stm_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index be0bd98ca8c1..44a571988219 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -437,6 +437,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) desc.pdata = pdata; desc.dev = dev; desc.groups = coresight_tmc_groups; + desc.name = dev_name(dev); switch (drvdata->config_type) { case TMC_CONFIG_TYPE_ETB: @@ -461,7 +462,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &tmc_etf_cs_ops; break; default: - pr_err("%s: Unsupported TMC config\n", pdata->name); + pr_err("%s: Unsupported TMC config\n", desc.name); ret = -EINVAL; goto out; } @@ -472,7 +473,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) goto out; } - drvdata->miscdev.name = pdata->name; + drvdata->miscdev.name = desc.name; drvdata->miscdev.minor = MISC_DYNAMIC_MINOR; drvdata->miscdev.fops = &tmc_fops; ret = misc_register(&drvdata->miscdev); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index aec0ed7bf924..d8a2e3991c7e 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -157,6 +157,7 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &tpiu_cs_ops; desc.pdata = pdata; desc.dev = dev; + desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (!IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 4b130281236a..04b5d3c2bb3a 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -1199,7 +1199,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->dev.parent = desc->dev; csdev->dev.release = coresight_device_release; csdev->dev.bus = &coresight_bustype; - dev_set_name(&csdev->dev, "%s", desc->pdata->name); + dev_set_name(&csdev->dev, "%s", desc->name); ret = device_register(&csdev->dev); if (ret) { diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bf241dbf99c5..298db20ba8ce 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -91,13 +91,11 @@ union coresight_dev_subtype { /** * struct coresight_platform_data - data harvested from the DT specification - * @name: name of the component as shown under sysfs. * @nr_inport: number of input ports for this component. * @nr_outport: number of output ports for this component. * @conns: Array of nr_outport connections from this component */ struct coresight_platform_data { - const char *name; int nr_inport; int nr_outport; struct coresight_connection *conns; @@ -108,11 +106,12 @@ struct coresight_platform_data { * @type: as defined by @coresight_dev_type. * @subtype: as defined by @coresight_dev_subtype. * @ops: generic operations for this component, as defined - by @coresight_ops. + * by @coresight_ops. * @pdata: platform data collected from DT. * @dev: The device entity associated to this component. * @groups: operations specific to this component. These will end up - in the component's sysfs sub-directory. + * in the component's sysfs sub-directory. + * @name: name for the coresight device, also shown under sysfs. */ struct coresight_desc { enum coresight_dev_type type; @@ -121,6 +120,7 @@ struct coresight_desc { struct coresight_platform_data *pdata; struct device *dev; const struct attribute_group **groups; + const char *name; }; /** -- cgit v1.2.3 From b77e3ed038c0d877f6f3b6ad278b931048a48e34 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:52:59 -0600 Subject: coresight: Reuse platform data structure for connection tracking The platform specific information describes the connections and the ports of a given coresigh device. This information is also recorded in the coresight device as separate fields. Let us reuse the original platform description to streamline the handling of the data. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 4 +-- drivers/hwtracing/coresight/coresight.c | 46 ++++++++++++------------- include/linux/coresight.h | 8 ++--- 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 709448cf2dff..ce0114a5435c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -756,8 +756,8 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata) if (!IS_ENABLED(CONFIG_CORESIGHT_CATU)) return NULL; - for (i = 0; i < etr->nr_outport; i++) { - tmp = etr->conns[i].child_dev; + for (i = 0; i < etr->pdata->nr_outport; i++) { + tmp = etr->pdata->conns[i].child_dev; if (tmp && coresight_is_catu_device(tmp)) return tmp; } diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 068bd2fc0985..96e15154a566 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -100,8 +100,8 @@ static int coresight_find_link_inport(struct coresight_device *csdev, int i; struct coresight_connection *conn; - for (i = 0; i < parent->nr_outport; i++) { - conn = &parent->conns[i]; + for (i = 0; i < parent->pdata->nr_outport; i++) { + conn = &parent->pdata->conns[i]; if (conn->child_dev == csdev) return conn->child_port; } @@ -118,8 +118,8 @@ static int coresight_find_link_outport(struct coresight_device *csdev, int i; struct coresight_connection *conn; - for (i = 0; i < csdev->nr_outport; i++) { - conn = &csdev->conns[i]; + for (i = 0; i < csdev->pdata->nr_outport; i++) { + conn = &csdev->pdata->conns[i]; if (conn->child_dev == child) return conn->outport; } @@ -306,10 +306,10 @@ static void coresight_disable_link(struct coresight_device *csdev, if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) { refport = inport; - nr_conns = csdev->nr_inport; + nr_conns = csdev->pdata->nr_inport; } else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) { refport = outport; - nr_conns = csdev->nr_outport; + nr_conns = csdev->pdata->nr_outport; } else { refport = 0; nr_conns = 1; @@ -595,9 +595,10 @@ static void coresight_grab_device(struct coresight_device *csdev) { int i; - for (i = 0; i < csdev->nr_outport; i++) { - struct coresight_device *child = csdev->conns[i].child_dev; + for (i = 0; i < csdev->pdata->nr_outport; i++) { + struct coresight_device *child; + child = csdev->pdata->conns[i].child_dev; if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) pm_runtime_get_sync(child->dev.parent); } @@ -613,9 +614,10 @@ static void coresight_drop_device(struct coresight_device *csdev) int i; pm_runtime_put(csdev->dev.parent); - for (i = 0; i < csdev->nr_outport; i++) { - struct coresight_device *child = csdev->conns[i].child_dev; + for (i = 0; i < csdev->pdata->nr_outport; i++) { + struct coresight_device *child; + child = csdev->pdata->conns[i].child_dev; if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) pm_runtime_put(child->dev.parent); } @@ -645,9 +647,10 @@ static int _coresight_build_path(struct coresight_device *csdev, goto out; /* Not a sink - recursively explore each port found on this element */ - for (i = 0; i < csdev->nr_outport; i++) { - struct coresight_device *child_dev = csdev->conns[i].child_dev; + for (i = 0; i < csdev->pdata->nr_outport; i++) { + struct coresight_device *child_dev; + child_dev = csdev->pdata->conns[i].child_dev; if (child_dev && _coresight_build_path(child_dev, sink, path) == 0) { found = true; @@ -1000,8 +1003,8 @@ static int coresight_orphan_match(struct device *dev, void *data) * Circle throuch all the connection of that component. If we find * an orphan connection whose name matches @csdev, link it. */ - for (i = 0; i < i_csdev->nr_outport; i++) { - conn = &i_csdev->conns[i]; + for (i = 0; i < i_csdev->pdata->nr_outport; i++) { + conn = &i_csdev->pdata->conns[i]; /* We have found at least one orphan connection */ if (conn->child_dev == NULL) { @@ -1040,8 +1043,8 @@ static void coresight_fixup_device_conns(struct coresight_device *csdev) { int i; - for (i = 0; i < csdev->nr_outport; i++) { - struct coresight_connection *conn = &csdev->conns[i]; + for (i = 0; i < csdev->pdata->nr_outport; i++) { + struct coresight_connection *conn = &csdev->pdata->conns[i]; struct device *dev = NULL; if (conn->child_name) @@ -1075,8 +1078,8 @@ static int coresight_remove_match(struct device *dev, void *data) * Circle throuch all the connection of that component. If we find * a connection whose name matches @csdev, remove it. */ - for (i = 0; i < iterator->nr_outport; i++) { - conn = &iterator->conns[i]; + for (i = 0; i < iterator->pdata->nr_outport; i++) { + conn = &iterator->pdata->conns[i]; if (conn->child_dev == NULL) continue; @@ -1108,7 +1111,7 @@ static void coresight_remove_conns(struct coresight_device *csdev) * doesn't have at least one input port, there is no point * in searching all the devices. */ - if (csdev->nr_inport) + if (csdev->pdata->nr_inport) bus_for_each_dev(&coresight_bustype, NULL, csdev, coresight_remove_match); } @@ -1195,10 +1198,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->refcnt = refcnts; - csdev->nr_inport = desc->pdata->nr_inport; - csdev->nr_outport = desc->pdata->nr_outport; - - csdev->conns = desc->pdata->conns; + csdev->pdata = desc->pdata; csdev->type = desc->type; csdev->subtype = desc->subtype; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 298db20ba8ce..b67d5074ece0 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -140,9 +140,7 @@ struct coresight_connection { /** * struct coresight_device - representation of a device as used by the framework - * @conns: array of coresight_connections associated to this component. - * @nr_inport: number of input port associated to this component. - * @nr_outport: number of output port associated to this component. + * @pdata: Platform data with device connections associated to this device. * @type: as defined by @coresight_dev_type. * @subtype: as defined by @coresight_dev_subtype. * @ops: generic operations for this component, as defined @@ -157,9 +155,7 @@ struct coresight_connection { * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { - struct coresight_connection *conns; - int nr_inport; - int nr_outport; + struct coresight_platform_data *pdata; enum coresight_dev_type type; union coresight_dev_subtype subtype; const struct coresight_ops *ops; -- cgit v1.2.3 From 37ea1ffddffa63c920ce826786fe610c78f57842 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:53:03 -0600 Subject: coresight: Use fwnode handle instead of device names We rely on the device names to find a CoreSight device on the coresight bus. The device name however is obtained from the platform, which is bound to the real platform/amba device. As we are about to use different naming scheme for the coresight devices, we can't rely on the platform device name to find the corresponding coresight device. Instead we use the platform agnostic "fwnode handle" of the parent device to find the devices. We also reuse the same fwnode as the parent for the Coresight device we create. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-platform.c | 14 +++++--- drivers/hwtracing/coresight/coresight-priv.h | 6 ++-- drivers/hwtracing/coresight/coresight.c | 42 +++++++++++++++++++----- include/linux/coresight.h | 4 +-- 4 files changed, 49 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 4394095ad224..49112a58478e 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -36,7 +36,7 @@ static int coresight_alloc_conns(struct device *dev, return 0; } -static int coresight_device_fwnode_match(struct device *dev, void *fwnode) +int coresight_device_fwnode_match(struct device *dev, void *fwnode) { return dev_fwnode(dev) == fwnode; } @@ -219,9 +219,15 @@ static int of_coresight_parse_endpoint(struct device *dev, } conn->outport = endpoint.port; - conn->child_name = devm_kstrdup(dev, - dev_name(rdev), - GFP_KERNEL); + /* + * Hold the refcount to the target device. This could be + * released via: + * 1) coresight_release_platform_data() if the probe fails or + * this device is unregistered. + * 2) While removing the target device via + * coresight_remove_match() + */ + conn->child_fwnode = fwnode_handle_get(rdev_fwnode); conn->child_port = rendpoint.port; /* Connection record updated */ ret = 1; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index c21642114fc3..8b07fe55395a 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -200,8 +200,8 @@ static inline void *coresight_get_uci_data(const struct amba_id *id) return 0; } -static inline void -coresight_release_platform_data(struct coresight_platform_data *pdata) -{} +void coresight_release_platform_data(struct coresight_platform_data *pdata); + +int coresight_device_fwnode_match(struct device *dev, void *fwnode); #endif diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 526141c2f876..1287778c3be5 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -978,6 +978,7 @@ static void coresight_device_release(struct device *dev) { struct coresight_device *csdev = to_coresight_device(dev); + fwnode_handle_put(csdev->dev.fwnode); kfree(csdev->refcnt); kfree(csdev); } @@ -1009,13 +1010,11 @@ static int coresight_orphan_match(struct device *dev, void *data) /* We have found at least one orphan connection */ if (conn->child_dev == NULL) { /* Does it match this newly added device? */ - if (conn->child_name && - !strcmp(dev_name(&csdev->dev), conn->child_name)) { + if (conn->child_fwnode == csdev->dev.fwnode) conn->child_dev = csdev; - } else { + else /* This component still has an orphan */ still_orphan = true; - } } } @@ -1047,9 +1046,9 @@ static void coresight_fixup_device_conns(struct coresight_device *csdev) struct coresight_connection *conn = &csdev->pdata->conns[i]; struct device *dev = NULL; - if (conn->child_name) - dev = bus_find_device_by_name(&coresight_bustype, NULL, - conn->child_name); + dev = bus_find_device(&coresight_bustype, NULL, + (void *)conn->child_fwnode, + coresight_device_fwnode_match); if (dev) { conn->child_dev = to_coresight_device(dev); /* and put reference from 'bus_find_device()' */ @@ -1084,9 +1083,15 @@ static int coresight_remove_match(struct device *dev, void *data) if (conn->child_dev == NULL) continue; - if (!strcmp(dev_name(&csdev->dev), conn->child_name)) { + if (csdev->dev.fwnode == conn->child_fwnode) { iterator->orphan = true; conn->child_dev = NULL; + /* + * Drop the reference to the handle for the remote + * device acquired in parsing the connections from + * platform data. + */ + fwnode_handle_put(conn->child_fwnode); /* No need to continue */ break; } @@ -1166,6 +1171,22 @@ static int __init coresight_init(void) } postcore_initcall(coresight_init); +/* + * coresight_release_platform_data: Release references to the devices connected + * to the output port of this device. + */ +void coresight_release_platform_data(struct coresight_platform_data *pdata) +{ + int i; + + for (i = 0; i < pdata->nr_outport; i++) { + if (pdata->conns[i].child_fwnode) { + fwnode_handle_put(pdata->conns[i].child_fwnode); + pdata->conns[i].child_fwnode = NULL; + } + } +} + struct coresight_device *coresight_register(struct coresight_desc *desc) { int ret; @@ -1210,6 +1231,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->dev.parent = desc->dev; csdev->dev.release = coresight_device_release; csdev->dev.bus = &coresight_bustype; + /* + * Hold the reference to our parent device. This will be + * dropped only in coresight_device_release(). + */ + csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); dev_set_name(&csdev->dev, "%s", desc->name); ret = device_register(&csdev->dev); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index b67d5074ece0..b40544bc06fe 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -126,15 +126,15 @@ struct coresight_desc { /** * struct coresight_connection - representation of a single connection * @outport: a connection's output port number. - * @chid_name: remote component's name. * @child_port: remote component's port number @output is connected to. + * @chid_fwnode: remote component's fwnode handle. * @child_dev: a @coresight_device representation of the component connected to @outport. */ struct coresight_connection { int outport; - const char *child_name; int child_port; + struct fwnode_handle *child_fwnode; struct coresight_device *child_dev; }; -- cgit v1.2.3 From 0f5f9b6ba9e1a706f5a3b1bd467e9242ab31b352 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 19 Jun 2019 13:53:04 -0600 Subject: coresight: Use platform agnostic names So far we have reused the name of the "platform" device for the CoreSight device. But this is not very intuitive when we move to ACPI. Also, the ACPI device names have ":" in them (e.g, ARMHC97C:01), which the perf tool doesn't like very much. This patch introduces a generic naming scheme, givin more intuitive names for the devices that appear on the CoreSight bus. The names follow the pattern "prefix" followed by "index" (e.g, etm5). We maintain a list of allocated devices per "prefix" to make sure we don't allocate a new name when it is reprobed (e.g, due to unsatisifed device dependencies). So, we maintain the list of "fwnodes" of the parent devices to allocate a consistent name. All devices except the ETMs get an index allocated in the order of probing. ETMs get an index based on the CPU they are attached to. TMC devices are named using "tmc_etf", "tmc_etb", and "tmc_etr" prefixes depending on the configuration of the device. The replicators and funnels are not classified as dynamic/static anymore. One could easily figure that out by checking the presence of "mgmt" registers under sysfs. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-catu.c | 7 ++- drivers/hwtracing/coresight/coresight-etb10.c | 7 ++- drivers/hwtracing/coresight/coresight-etm3x.c | 4 +- drivers/hwtracing/coresight/coresight-etm4x.c | 4 +- drivers/hwtracing/coresight/coresight-funnel.c | 7 ++- drivers/hwtracing/coresight/coresight-replicator.c | 7 ++- drivers/hwtracing/coresight/coresight-stm.c | 12 +++-- drivers/hwtracing/coresight/coresight-tmc.c | 15 +++++- drivers/hwtracing/coresight/coresight-tpiu.c | 7 ++- drivers/hwtracing/coresight/coresight.c | 58 ++++++++++++++++++++++ include/linux/coresight.h | 25 +++++++++- 11 files changed, 141 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 1c1ad1268b9d..16ebf38a9f66 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -28,6 +28,8 @@ #define catu_dbg(x, ...) do {} while (0) #endif +DEFINE_CORESIGHT_DEVLIST(catu_devs, "catu"); + struct catu_etr_buf { struct tmc_sg_table *catu_table; dma_addr_t sladdr; @@ -505,6 +507,10 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id) struct device *dev = &adev->dev; void __iomem *base; + catu_desc.name = coresight_alloc_device_name(&catu_devs, dev); + if (!catu_desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) { ret = -ENOMEM; @@ -551,7 +557,6 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id) catu_desc.type = CORESIGHT_DEV_TYPE_HELPER; catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU; catu_desc.ops = &catu_ops; - catu_desc.name = dev_name(dev); drvdata->csdev = coresight_register(&catu_desc); if (IS_ERR(drvdata->csdev)) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index fffaac3e3677..d5b9edecf76e 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -63,6 +63,8 @@ #define ETB_FFSR_BIT 1 #define ETB_FRAME_SIZE_WORDS 4 +DEFINE_CORESIGHT_DEVLIST(etb_devs, "etb"); + /** * struct etb_drvdata - specifics associated to an ETB component * @base: memory mapped base address for this component. @@ -733,6 +735,10 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; + desc.name = coresight_alloc_device_name(&etb_devs, dev); + if (!desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; @@ -777,7 +783,6 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &etb_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.name = dev_name(dev); desc.groups = coresight_etb_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index f2d461610a2f..bed729140718 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -815,6 +815,9 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) } drvdata->cpu = coresight_get_cpu(dev); + desc.name = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu); + if (!desc.name) + return -ENOMEM; cpus_read_lock(); etmdrvdata[drvdata->cpu] = drvdata; @@ -856,7 +859,6 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &etm_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.name = dev_name(dev); desc.groups = coresight_etm_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 1609da1eaf83..7fe266194ab5 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1101,6 +1101,9 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) spin_lock_init(&drvdata->spinlock); drvdata->cpu = coresight_get_cpu(dev); + desc.name = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu); + if (!desc.name) + return -ENOMEM; cpus_read_lock(); etmdrvdata[drvdata->cpu] = drvdata; @@ -1144,7 +1147,6 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) desc.pdata = pdata; desc.dev = dev; desc.groups = coresight_etmv4_groups; - desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 75fa2d3ad9b4..5867fcb4503b 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -29,6 +29,8 @@ #define FUNNEL_HOLDTIME (0x7 << FUNNEL_HOLDTIME_SHFT) #define FUNNEL_ENSx_MASK 0xff +DEFINE_CORESIGHT_DEVLIST(funnel_devs, "funnel"); + /** * struct funnel_drvdata - specifics associated to a funnel component * @base: memory mapped base address for this component. @@ -192,6 +194,10 @@ static int funnel_probe(struct device *dev, struct resource *res) of_device_is_compatible(dev->of_node, "arm,coresight-funnel")) pr_warn_once("Uses OBSOLETE CoreSight funnel binding\n"); + desc.name = coresight_alloc_device_name(&funnel_devs, dev); + if (!desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; @@ -231,7 +237,6 @@ static int funnel_probe(struct device *dev, struct resource *res) desc.ops = &funnel_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 64dfde7241c1..c0e42253dfe7 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -22,6 +22,8 @@ #define REPLICATOR_IDFILTER0 0x000 #define REPLICATOR_IDFILTER1 0x004 +DEFINE_CORESIGHT_DEVLIST(replicator_devs, "replicator"); + /** * struct replicator_drvdata - specifics associated to a replicator component * @base: memory mapped base address for this component. Also indicates @@ -183,6 +185,10 @@ static int replicator_probe(struct device *dev, struct resource *res) of_device_is_compatible(dev->of_node, "arm,coresight-replicator")) pr_warn_once("Uses OBSOLETE CoreSight replicator binding\n"); + desc.name = coresight_alloc_device_name(&replicator_devs, dev); + if (!desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; @@ -222,7 +228,6 @@ static int replicator_probe(struct device *dev, struct resource *res) desc.ops = &replicator_cs_ops; desc.pdata = dev->platform_data; desc.dev = dev; - desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 03528f3fa9ff..e3e2b000cfb7 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -107,6 +107,8 @@ struct channel_space { unsigned long *guaranteed; }; +DEFINE_CORESIGHT_DEVLIST(stm_devs, "stm"); + /** * struct stm_drvdata - specifics associated to an STM component * @base: memory mapped base address for this component. @@ -810,6 +812,10 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) size_t bitmap_size; struct coresight_desc desc = { 0 }; + desc.name = coresight_alloc_device_name(&stm_devs, dev); + if (!desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; @@ -854,11 +860,12 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) spin_lock_init(&drvdata->spinlock); stm_init_default_data(drvdata); - stm_init_generic_data(drvdata, dev_name(dev)); + stm_init_generic_data(drvdata, desc.name); if (stm_register_device(dev, &drvdata->stm, THIS_MODULE)) { dev_info(dev, - "stm_register_device failed, probing deferred\n"); + "%s : stm_register_device failed, probing deferred\n", + desc.name); return -EPROBE_DEFER; } @@ -874,7 +881,6 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &stm_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.name = dev_name(dev); desc.groups = coresight_stm_groups; drvdata->csdev = coresight_register(&desc); if (IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 212630e65cca..be37aff573b4 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -27,6 +27,10 @@ #include "coresight-priv.h" #include "coresight-tmc.h" +DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb"); +DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf"); +DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr"); + void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata) { /* Ensure formatter, unformatter and hardware fifo are empty */ @@ -397,6 +401,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) struct tmc_drvdata *drvdata; struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; + struct coresight_dev_list *dev_list = NULL; ret = -ENOMEM; drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); @@ -429,13 +434,13 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) desc.dev = dev; desc.groups = coresight_tmc_groups; - desc.name = dev_name(dev); switch (drvdata->config_type) { case TMC_CONFIG_TYPE_ETB: desc.type = CORESIGHT_DEV_TYPE_SINK; desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER; desc.ops = &tmc_etb_cs_ops; + dev_list = &etb_devs; break; case TMC_CONFIG_TYPE_ETR: desc.type = CORESIGHT_DEV_TYPE_SINK; @@ -447,11 +452,13 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) goto out; idr_init(&drvdata->idr); mutex_init(&drvdata->idr_mutex); + dev_list = &etr_devs; break; case TMC_CONFIG_TYPE_ETF: desc.type = CORESIGHT_DEV_TYPE_LINKSINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO; desc.ops = &tmc_etf_cs_ops; + dev_list = &etf_devs; break; default: pr_err("%s: Unsupported TMC config\n", desc.name); @@ -459,6 +466,12 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) goto out; } + desc.name = coresight_alloc_device_name(dev_list, dev); + if (!desc.name) { + ret = -ENOMEM; + goto out; + } + pdata = coresight_get_platform_data(dev); if (IS_ERR(pdata)) { ret = PTR_ERR(pdata); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index b699d613425d..f8583e4032a6 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -47,6 +47,8 @@ #define FFCR_FON_MAN BIT(6) #define FFCR_STOP_FI BIT(12) +DEFINE_CORESIGHT_DEVLIST(tpiu_devs, "tpiu"); + /** * @base: memory mapped base address for this component. * @atclk: optional clock for the core parts of the TPIU. @@ -125,6 +127,10 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; + desc.name = coresight_alloc_device_name(&tpiu_devs, dev); + if (!desc.name) + return -ENOMEM; + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; @@ -157,7 +163,6 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) desc.ops = &tpiu_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.name = dev_name(dev); drvdata->csdev = coresight_register(&desc); if (!IS_ERR(drvdata->csdev)) { diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 1287778c3be5..86d1fc2c1bd4 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -1291,3 +1291,61 @@ void coresight_unregister(struct coresight_device *csdev) device_unregister(&csdev->dev); } EXPORT_SYMBOL_GPL(coresight_unregister); + + +/* + * coresight_search_device_idx - Search the fwnode handle of a device + * in the given dev_idx list. Must be called with the coresight_mutex held. + * + * Returns the index of the entry, when found. Otherwise, -ENOENT. + */ +static inline int coresight_search_device_idx(struct coresight_dev_list *dict, + struct fwnode_handle *fwnode) +{ + int i; + + for (i = 0; i < dict->nr_idx; i++) + if (dict->fwnode_list[i] == fwnode) + return i; + return -ENOENT; +} + +/* + * coresight_alloc_device_name - Get an index for a given device in the + * device index list specific to a driver. An index is allocated for a + * device and is tracked with the fwnode_handle to prevent allocating + * duplicate indices for the same device (e.g, if we defer probing of + * a device due to dependencies), in case the index is requested again. + */ +char *coresight_alloc_device_name(struct coresight_dev_list *dict, + struct device *dev) +{ + int idx; + char *name = NULL; + struct fwnode_handle **list; + + mutex_lock(&coresight_mutex); + + idx = coresight_search_device_idx(dict, dev_fwnode(dev)); + if (idx < 0) { + /* Make space for the new entry */ + idx = dict->nr_idx; + list = krealloc(dict->fwnode_list, + (idx + 1) * sizeof(*dict->fwnode_list), + GFP_KERNEL); + if (ZERO_OR_NULL_PTR(list)) { + idx = -ENOMEM; + goto done; + } + + list[idx] = dev_fwnode(dev); + dict->fwnode_list = list; + dict->nr_idx = idx + 1; + } + + name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", dict->pfx, idx); +done: + mutex_unlock(&coresight_mutex); + return name; +} +EXPORT_SYMBOL_GPL(coresight_alloc_device_name); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index b40544bc06fe..a2b68823717b 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -168,6 +168,28 @@ struct coresight_device { struct dev_ext_attribute *ea; }; +/* + * coresight_dev_list - Mapping for devices to "name" index for device + * names. + * + * @nr_idx: Number of entries already allocated. + * @pfx: Prefix pattern for device name. + * @fwnode_list: Array of fwnode_handles associated with each allocated + * index, upto nr_idx entries. + */ +struct coresight_dev_list { + int nr_idx; + const char *pfx; + struct fwnode_handle **fwnode_list; +}; + +#define DEFINE_CORESIGHT_DEVLIST(var, dev_pfx) \ +static struct coresight_dev_list (var) = { \ + .pfx = dev_pfx, \ + .nr_idx = 0, \ + .fwnode_list = NULL, \ +} + #define to_coresight_device(d) container_of(d, struct coresight_device, dev) #define source_ops(csdev) csdev->ops->source_ops @@ -261,7 +283,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base); extern void coresight_disclaim_device(void __iomem *base); extern void coresight_disclaim_device_unlocked(void __iomem *base); - +extern char *coresight_alloc_device_name(struct coresight_dev_list *devs, + struct device *dev); #else static inline struct coresight_device * coresight_register(struct coresight_desc *desc) { return NULL; } -- cgit v1.2.3 From cbf4f7325a638ced1d815580dfed44ea3b76163c Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Wed, 19 Jun 2019 20:36:32 +0200 Subject: i3c: add mixed limited bus mode The i3c bus spec defines a bus configuration where i2c devices don't have a 50ns filter but support SCL running at SDR max rate (12.5MHz). This patch introduces the limited bus mode so that users can use a higher speed in presence of i2c devices index 1. Signed-off-by: Vitor Soares Cc: Boris Brezillon Cc: Signed-off-by: Boris Brezillon --- drivers/i3c/master.c | 5 +++++ include/linux/i3c/master.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 54026be03998..1318adfe0216 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -470,6 +470,7 @@ static int i3c_bus_init(struct i3c_bus *i3cbus) static const char * const i3c_bus_mode_strings[] = { [I3C_BUS_MODE_PURE] = "pure", [I3C_BUS_MODE_MIXED_FAST] = "mixed-fast", + [I3C_BUS_MODE_MIXED_LIMITED] = "mixed-limited", [I3C_BUS_MODE_MIXED_SLOW] = "mixed-slow", }; @@ -584,6 +585,7 @@ int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode, i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; break; case I3C_BUS_MODE_MIXED_FAST: + case I3C_BUS_MODE_MIXED_LIMITED: if (!i3cbus->scl_rate.i3c) i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; if (!i3cbus->scl_rate.i2c) @@ -2487,6 +2489,9 @@ int i3c_master_register(struct i3c_master_controller *master, mode = I3C_BUS_MODE_MIXED_FAST; break; case I3C_LVR_I2C_INDEX(1): + if (mode < I3C_BUS_MODE_MIXED_LIMITED) + mode = I3C_BUS_MODE_MIXED_LIMITED; + break; case I3C_LVR_I2C_INDEX(2): if (mode < I3C_BUS_MODE_MIXED_SLOW) mode = I3C_BUS_MODE_MIXED_SLOW; diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index eca8337bdaa5..1f08fa8d69d2 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -250,12 +250,17 @@ struct i3c_device { * the bus. The only impact in this mode is that the * high SCL pulse has to stay below 50ns to trick I2C * devices when transmitting I3C frames + * @I3C_BUS_MODE_MIXED_LIMITED: I2C devices without 50ns spike filter are + * present on the bus. However they allow + * compliance up to the maximum SDR SCL clock + * frequency. * @I3C_BUS_MODE_MIXED_SLOW: I2C devices without 50ns spike filter are present * on the bus */ enum i3c_bus_mode { I3C_BUS_MODE_PURE, I3C_BUS_MODE_MIXED_FAST, + I3C_BUS_MODE_MIXED_LIMITED, I3C_BUS_MODE_MIXED_SLOW, }; -- cgit v1.2.3 From ae9e13d621d6795ec1ad6bf10bd2549c6c3feca4 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 23 Apr 2019 09:30:05 +0800 Subject: x86/e820, ioport: Add a new I/O resource descriptor IORES_DESC_RESERVED When executing the kexec_file_load() syscall, the first kernel needs to pass the e820 reserved ranges to the second kernel because some devices (PCI, for example) need them present in the kdump kernel for proper initialization. But the kernel can not exactly match the e820 reserved ranges when walking through the iomem resources using the default IORES_DESC_NONE descriptor, because there are several types of e820 ranges which are marked IORES_DESC_NONE, see e820_type_to_iores_desc(). Therefore, add a new I/O resource descriptor called IORES_DESC_RESERVED to mark exactly those ranges. It will be used to match the reserved resource ranges when walking through iomem resources. [ bp: Massage commit message. ] Suggested-by: Borislav Petkov Signed-off-by: Lianbo Jiang Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: bhe@redhat.com Cc: dave.hansen@linux.intel.com Cc: dyoung@redhat.com Cc: "H. Peter Anvin" Cc: Huang Zijiang Cc: Ingo Molnar Cc: Joe Perches Cc: Juergen Gross Cc: kexec@lists.infradead.org Cc: Masayoshi Mizuma Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86-ml Link: https://lkml.kernel.org/r/20190423013007.17838-2-lijiang@redhat.com --- arch/x86/kernel/e820.c | 2 +- include/linux/ioport.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 8f32e705a980..e69408bf664b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE; case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */ - case E820_TYPE_RESERVED: /* Fall-through: */ default: return IORES_DESC_NONE; } } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index da0ebaec25f0..6ed59de48bd5 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -133,6 +133,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, + IORES_DESC_RESERVED = 8, }; /* helpers to define resources */ -- cgit v1.2.3 From 5da04cc86d1215fd9fe0e5c88ead6e8428a75e56 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 23 Apr 2019 09:30:06 +0800 Subject: x86/mm: Rework ioremap resource mapping determination On ioremap(), __ioremap_check_mem() does a couple of checks on the supplied memory range to determine how the range should be mapped and in particular what protection flags should be used. Generalize the procedure by introducing IORES_MAP_* flags which control different aspects of the ioremapping and use them in the respective helpers which determine which descriptor flags should be set per range. [ bp: - Rewrite commit message. - Add/improve comments. - Reflow __ioremap_caller()'s args. - s/__ioremap_check_desc/__ioremap_check_encrypted/g; - s/__ioremap_res_check/__ioremap_collect_map_flags/g; - clarify __ioremap_check_ram()'s purpose. ] Signed-off-by: Lianbo Jiang Co-developed-by: Borislav Petkov Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: bhe@redhat.com Cc: Dave Hansen Cc: dyoung@redhat.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kexec@lists.infradead.org Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86-ml Link: https://lkml.kernel.org/r/20190423013007.17838-3-lijiang@redhat.com --- arch/x86/mm/ioremap.c | 71 ++++++++++++++++++++++++++++++++------------------ include/linux/ioport.h | 9 +++++++ 2 files changed, 54 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4b6423e7bd21..e500f1df1140 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -28,9 +28,11 @@ #include "physaddr.h" -struct ioremap_mem_flags { - bool system_ram; - bool desc_other; +/* + * Descriptor controlling ioremap() behavior. + */ +struct ioremap_desc { + unsigned int flags; }; /* @@ -62,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static bool __ioremap_check_ram(struct resource *res) +/* Does the range (or a subset of) contain normal RAM? */ +static unsigned int __ioremap_check_ram(struct resource *res) { unsigned long start_pfn, stop_pfn; unsigned long i; if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) - return false; + return 0; start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; stop_pfn = (res->end + 1) >> PAGE_SHIFT; @@ -76,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res) for (i = 0; i < (stop_pfn - start_pfn); ++i) if (pfn_valid(start_pfn + i) && !PageReserved(pfn_to_page(start_pfn + i))) - return true; + return IORES_MAP_SYSTEM_RAM; } - return false; + return 0; } -static int __ioremap_check_desc_other(struct resource *res) +/* + * In a SEV guest, NONE and RESERVED should not be mapped encrypted because + * there the whole memory is already encrypted. + */ +static unsigned int __ioremap_check_encrypted(struct resource *res) { - return (res->desc != IORES_DESC_NONE); + if (!sev_active()) + return 0; + + switch (res->desc) { + case IORES_DESC_NONE: + case IORES_DESC_RESERVED: + break; + default: + return IORES_MAP_ENCRYPTED; + } + + return 0; } -static int __ioremap_res_check(struct resource *res, void *arg) +static int __ioremap_collect_map_flags(struct resource *res, void *arg) { - struct ioremap_mem_flags *flags = arg; + struct ioremap_desc *desc = arg; - if (!flags->system_ram) - flags->system_ram = __ioremap_check_ram(res); + if (!(desc->flags & IORES_MAP_SYSTEM_RAM)) + desc->flags |= __ioremap_check_ram(res); - if (!flags->desc_other) - flags->desc_other = __ioremap_check_desc_other(res); + if (!(desc->flags & IORES_MAP_ENCRYPTED)) + desc->flags |= __ioremap_check_encrypted(res); - return flags->system_ram && flags->desc_other; + return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) == + (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)); } /* @@ -106,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg) * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, - struct ioremap_mem_flags *flags) + struct ioremap_desc *desc) { u64 start, end; start = (u64)addr; end = start + size - 1; - memset(flags, 0, sizeof(*flags)); + memset(desc, 0, sizeof(struct ioremap_desc)); - walk_mem_res(start, end, flags, __ioremap_res_check); + walk_mem_res(start, end, desc, __ioremap_collect_map_flags); } /* @@ -131,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, enum page_cache_mode pcm, - void *caller, bool encrypted) +static void __iomem * +__ioremap_caller(resource_size_t phys_addr, unsigned long size, + enum page_cache_mode pcm, void *caller, bool encrypted) { unsigned long offset, vaddr; resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; - struct ioremap_mem_flags mem_flags; + struct ioremap_desc io_desc; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -158,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - __ioremap_check_mem(phys_addr, size, &mem_flags); + __ioremap_check_mem(phys_addr, size, &io_desc); /* * Don't allow anybody to remap normal RAM that we're using.. */ - if (mem_flags.system_ram) { + if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -201,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * resulting mapping. */ prot = PAGE_KERNEL_IO; - if ((sev_active() && mem_flags.desc_other) || encrypted) + if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) prot = pgprot_encrypted(prot); switch (pcm) { diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6ed59de48bd5..5db386cfc2d4 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include #include +#include /* * Resources are tree-like, allowing * nesting etc.. @@ -136,6 +137,14 @@ enum { IORES_DESC_RESERVED = 8, }; +/* + * Flags controlling ioremap() behavior. + */ +enum { + IORES_MAP_SYSTEM_RAM = BIT(0), + IORES_MAP_ENCRYPTED = BIT(1), +}; + /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ { \ -- cgit v1.2.3 From 2f578aaf51624aa6fcff041fc7dc5c2d4dfa447f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 05:15:51 +0900 Subject: block: move tag field position in struct request __data_len and __sector are internal fields which should not be accessed directly in driver-level like the comment above it. But, tag field can be accessed by driver level directly so that we need to make the comment right by moving it to some other place. Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 592669bcc536..90e6914bea0c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -137,11 +137,11 @@ struct request { unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + int tag; int internal_tag; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ - int tag; sector_t __sector; /* sector cursor */ struct bio *bio; -- cgit v1.2.3 From 3a211b71529fdd0a89095b18fb19155db0c8fb5d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 23 May 2019 18:43:11 +0300 Subject: blk-core: Remove blk_end_request*() declarations Commit a1ce35fa49852db60fc6e268 ("block: remove dead elevator code") deleted blk_end_request() and friends, but some declaration are still left. Purge them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- include/linux/blkdev.h | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 8340f69670d8..94c6520bc786 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1348,7 +1348,7 @@ EXPORT_SYMBOL_GPL(blk_steal_bios); * * This special helper function is only for request stacking drivers * (e.g. request-based dm) so that they can handle partial completion. - * Actual device drivers should use blk_end_request instead. + * Actual device drivers should use blk_mq_end_request instead. * * Passing the result of blk_rq_bytes() as @nr_bytes guarantees * %false return from this function. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90e6914bea0c..ad49a775c54f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1026,21 +1026,9 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); * * blk_update_request() completes given number of bytes and updates * the request without completing it. - * - * blk_end_request() and friends. __blk_end_request() must be called - * with the request queue spinlock acquired. - * - * Several drivers define their own end_request and call - * blk_end_request() for parts of the original function. - * This prevents code duplication in drivers. */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); -extern void __blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v1.2.3 From 8527fa6cc68a489f735823e61b31ec6cb266274a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2019 14:54:36 +0200 Subject: netfilter: synproxy: fix building syncookie calls When either CONFIG_IPV6 or CONFIG_SYN_COOKIES are disabled, the kernel fails to build: include/linux/netfilter_ipv6.h:180:9: error: implicit declaration of function '__cookie_v6_init_sequence' [-Werror,-Wimplicit-function-declaration] return __cookie_v6_init_sequence(iph, th, mssp); include/linux/netfilter_ipv6.h:194:9: error: implicit declaration of function '__cookie_v6_check' [-Werror,-Wimplicit-function-declaration] return __cookie_v6_check(iph, th, cookie); net/ipv6/netfilter.c:237:26: error: use of undeclared identifier '__cookie_v6_init_sequence'; did you mean 'cookie_init_sequence'? net/ipv6/netfilter.c:238:21: error: use of undeclared identifier '__cookie_v6_check'; did you mean '__cookie_v4_check'? Fix the IS_ENABLED() checks to match the function declaration and definitions for these. Fixes: 3006a5224f15 ("netfilter: synproxy: remove module dependency on IPv6 SYNPROXY") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 14 ++++++++------ net/ipv6/netfilter.c | 2 ++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 35b12525ee45..22e6398bc482 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -163,31 +163,33 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp) { +#if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_init_sequence(iph, th, mssp); - - return 0; -#else +#elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_init_sequence(iph, th, mssp); #endif +#endif + return 0; } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, __u32 cookie) { +#if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_v6_check(iph, th, cookie); - - return 0; -#else +#elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_check(iph, th, cookie); #endif +#endif + return 0; } __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index dffb10fdc3e8..61819ed858b1 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -234,8 +234,10 @@ static const struct nf_ipv6_ops ipv6ops = { .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, .route = __nf_ip6_route, +#if IS_ENABLED(CONFIG_SYN_COOKIES) .cookie_init_sequence = __cookie_v6_init_sequence, .cookie_v6_check = __cookie_v6_check, +#endif #endif .route_input = ip6_route_input, .fragment = ip6_fragment, -- cgit v1.2.3 From 2735b683e1f284560f7e8e1d1ebf385ab111312d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 19 Jun 2019 14:41:56 +0100 Subject: ASoC: madera: Add common support for Cirrus Logic Madera codecs The Cirrus Logic Madera codecs are a family of related codecs with extensive digital and analogue I/O, digital mixing and routing, signal processing and programmable DSPs. This patch adds common support code shared by all Madera codecs. This patch also adds the pdata to the parent mfd pdata struct. Since there is a circular build dependency it's convenient to patch them both atomically. Signed-off-by: Nariman Poushin Signed-off-by: Nikesh Oswal Signed-off-by: Piotr Stankiewicz Signed-off-by: Ajit Pandey Signed-off-by: Richard Fitzgerald Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- MAINTAINERS | 4 + include/linux/mfd/madera/pdata.h | 4 + include/sound/madera-pdata.h | 63 + sound/soc/codecs/Kconfig | 5 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/madera.c | 4181 ++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/madera.h | 446 ++++ 7 files changed, 4705 insertions(+) create mode 100644 include/sound/madera-pdata.h create mode 100644 sound/soc/codecs/madera.c create mode 100644 sound/soc/codecs/madera.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index c35d1f72bc73..9ea100957c59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3915,14 +3915,18 @@ W: https://github.com/CirrusLogic/linux-drivers/wiki S: Supported F: Documentation/devicetree/bindings/mfd/madera.txt F: Documentation/devicetree/bindings/pinctrl/cirrus,madera-pinctrl.txt +F: Documentation/devicetree/bindings/sound/madera.txt F: include/dt-bindings/sound/madera* F: include/linux/irqchip/irq-madera* F: include/linux/mfd/madera/* +F: include/sound/madera* F: drivers/gpio/gpio-madera* F: drivers/irqchip/irq-madera* F: drivers/mfd/madera* F: drivers/mfd/cs47l* F: drivers/pinctrl/cirrus/* +F: sound/soc/codecs/cs47l* +F: sound/soc/codecs/madera* CLANG-FORMAT FILE M: Miguel Ojeda diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h index 8dc852402dbb..60cd8ec98563 100644 --- a/include/linux/mfd/madera/pdata.h +++ b/include/linux/mfd/madera/pdata.h @@ -16,6 +16,7 @@ #include #include #include +#include #define MADERA_MAX_MICBIAS 4 #define MADERA_MAX_CHILD_MICBIAS 4 @@ -39,6 +40,7 @@ struct madera_codec_pdata; * @gpsw: General purpose switch mode setting. Depends on the external * hardware connected to the switch. (See the SW1_MODE field * in the datasheet for the available values for your codec) + * @codec: Substruct of pdata for the ASoC codec driver */ struct madera_pdata { struct gpio_desc *reset; @@ -53,6 +55,8 @@ struct madera_pdata { int n_gpio_configs; u32 gpsw[MADERA_MAX_GPSW]; + + struct madera_codec_pdata codec; }; #endif diff --git a/include/sound/madera-pdata.h b/include/sound/madera-pdata.h new file mode 100644 index 000000000000..441decefb7f3 --- /dev/null +++ b/include/sound/madera-pdata.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Platform data for Madera codec driver + * + * Copyright (C) 2016-2019 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef MADERA_CODEC_PDATA_H +#define MADERA_CODEC_PDATA_H + +#include + +#define MADERA_MAX_INPUT 6 +#define MADERA_MAX_MUXED_CHANNELS 4 +#define MADERA_MAX_OUTPUT 6 +#define MADERA_MAX_AIF 4 +#define MADERA_MAX_PDM_SPK 2 +#define MADERA_MAX_DSP 7 + +/** + * struct madera_codec_pdata + * + * @max_channels_clocked: Maximum number of channels that I2S clocks will be + * generated for. Useful when clock master for systems + * where the I2S bus has multiple data lines. + * @dmic_ref: Indicates how the MICBIAS pins have been externally + * connected to DMICs on each input. A value of 0 + * indicates MICVDD and is the default. Other values are: + * For CS47L35 one of the CS47L35_DMIC_REF_xxx values + * For all other codecs one of the MADERA_DMIC_REF_xxx + * Also see the datasheet for a description of the + * INn_DMIC_SUP field. + * @inmode: Mode for the ADC inputs. One of the MADERA_INMODE_xxx + * values. Two-dimensional array + * [input_number][channel number], with four slots per + * input in the order + * [n][0]=INnAL [n][1]=INnAR [n][2]=INnBL [n][3]=INnBR + * @out_mono: For each output set the value to TRUE to indicate that + * the output is mono. [0]=OUT1, [1]=OUT2, ... + * @pdm_fmt: PDM speaker data format. See the PDM_SPKn_FMT field in + * the datasheet for a description of this value. + * @pdm_mute: PDM mute format. See the PDM_SPKn_CTRL_1 register + * in the datasheet for a description of this value. + */ +struct madera_codec_pdata { + u32 max_channels_clocked[MADERA_MAX_AIF]; + + u32 dmic_ref[MADERA_MAX_INPUT]; + + u32 inmode[MADERA_MAX_INPUT][MADERA_MAX_MUXED_CHANNELS]; + + bool out_mono[MADERA_MAX_OUTPUT]; + + u32 pdm_fmt[MADERA_MAX_PDM_SPK]; + u32 pdm_mute[MADERA_MAX_PDM_SPK]; +}; + +#endif diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 1bda52ef0cd0..f3ac661b8845 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -284,10 +284,12 @@ config SND_SOC_WM_HUBS config SND_SOC_WM_ADSP tristate select SND_SOC_COMPRESS + default y if SND_SOC_MADERA=y default y if SND_SOC_CS47L24=y default y if SND_SOC_WM5102=y default y if SND_SOC_WM5110=y default y if SND_SOC_WM2200=y + default m if SND_SOC_MADERA=m default m if SND_SOC_CS47L24=m default m if SND_SOC_WM5102=m default m if SND_SOC_WM5110=m @@ -704,6 +706,9 @@ config SND_SOC_LOCHNAGAR_SC This driver support the sound card functionality of the Cirrus Logic Lochnagar audio development board. +config SND_SOC_MADERA + tristate + config SND_SOC_MAX98088 tristate "Maxim MAX98088/9 Low-Power, Stereo Audio Codec" depends on I2C diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 112701fd44a8..d21e1be3e7a7 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -93,6 +93,7 @@ snd-soc-l3-objs := l3.o snd-soc-lm4857-objs := lm4857.o snd-soc-lm49453-objs := lm49453.o snd-soc-lochnagar-sc-objs := lochnagar-sc.o +snd-soc-madera-objs := madera.o snd-soc-max9759-objs := max9759.o snd-soc-max9768-objs := max9768.o snd-soc-max98088-objs := max98088.o @@ -369,6 +370,7 @@ obj-$(CONFIG_SND_SOC_L3) += snd-soc-l3.o obj-$(CONFIG_SND_SOC_LM4857) += snd-soc-lm4857.o obj-$(CONFIG_SND_SOC_LM49453) += snd-soc-lm49453.o obj-$(CONFIG_SND_SOC_LOCHNAGAR_SC) += snd-soc-lochnagar-sc.o +obj-$(CONFIG_SND_SOC_MADERA) += snd-soc-madera.o obj-$(CONFIG_SND_SOC_MAX9759) += snd-soc-max9759.o obj-$(CONFIG_SND_SOC_MAX9768) += snd-soc-max9768.o obj-$(CONFIG_SND_SOC_MAX98088) += snd-soc-max98088.o diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c new file mode 100644 index 000000000000..6146c7a070cb --- /dev/null +++ b/sound/soc/codecs/madera.c @@ -0,0 +1,4181 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Cirrus Logic Madera class codecs common support +// +// Copyright (C) 2015-2019 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by the +// Free Software Foundation; version 2. +// + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "madera.h" + +#define MADERA_AIF_BCLK_CTRL 0x00 +#define MADERA_AIF_TX_PIN_CTRL 0x01 +#define MADERA_AIF_RX_PIN_CTRL 0x02 +#define MADERA_AIF_RATE_CTRL 0x03 +#define MADERA_AIF_FORMAT 0x04 +#define MADERA_AIF_RX_BCLK_RATE 0x06 +#define MADERA_AIF_FRAME_CTRL_1 0x07 +#define MADERA_AIF_FRAME_CTRL_2 0x08 +#define MADERA_AIF_FRAME_CTRL_3 0x09 +#define MADERA_AIF_FRAME_CTRL_4 0x0A +#define MADERA_AIF_FRAME_CTRL_5 0x0B +#define MADERA_AIF_FRAME_CTRL_6 0x0C +#define MADERA_AIF_FRAME_CTRL_7 0x0D +#define MADERA_AIF_FRAME_CTRL_8 0x0E +#define MADERA_AIF_FRAME_CTRL_9 0x0F +#define MADERA_AIF_FRAME_CTRL_10 0x10 +#define MADERA_AIF_FRAME_CTRL_11 0x11 +#define MADERA_AIF_FRAME_CTRL_12 0x12 +#define MADERA_AIF_FRAME_CTRL_13 0x13 +#define MADERA_AIF_FRAME_CTRL_14 0x14 +#define MADERA_AIF_FRAME_CTRL_15 0x15 +#define MADERA_AIF_FRAME_CTRL_16 0x16 +#define MADERA_AIF_FRAME_CTRL_17 0x17 +#define MADERA_AIF_FRAME_CTRL_18 0x18 +#define MADERA_AIF_TX_ENABLES 0x19 +#define MADERA_AIF_RX_ENABLES 0x1A +#define MADERA_AIF_FORCE_WRITE 0x1B + +#define MADERA_DSP_CONFIG_1_OFFS 0x00 +#define MADERA_DSP_CONFIG_2_OFFS 0x02 + +#define MADERA_DSP_CLK_SEL_MASK 0x70000 +#define MADERA_DSP_CLK_SEL_SHIFT 16 + +#define MADERA_DSP_RATE_MASK 0x7800 +#define MADERA_DSP_RATE_SHIFT 11 + +#define MADERA_SYSCLK_6MHZ 0 +#define MADERA_SYSCLK_12MHZ 1 +#define MADERA_SYSCLK_24MHZ 2 +#define MADERA_SYSCLK_49MHZ 3 +#define MADERA_SYSCLK_98MHZ 4 + +#define MADERA_DSPCLK_9MHZ 0 +#define MADERA_DSPCLK_18MHZ 1 +#define MADERA_DSPCLK_36MHZ 2 +#define MADERA_DSPCLK_73MHZ 3 +#define MADERA_DSPCLK_147MHZ 4 + +#define MADERA_FLL_VCO_CORNER 141900000 +#define MADERA_FLL_MAX_FREF 13500000 +#define MADERA_FLL_MAX_N 1023 +#define MADERA_FLL_MIN_FOUT 90000000 +#define MADERA_FLL_MAX_FOUT 100000000 +#define MADERA_FLL_MAX_FRATIO 16 +#define MADERA_FLL_MAX_REFDIV 8 +#define MADERA_FLL_OUTDIV 3 +#define MADERA_FLL_VCO_MULT 3 +#define MADERA_FLLAO_MAX_FREF 12288000 +#define MADERA_FLLAO_MIN_N 4 +#define MADERA_FLLAO_MAX_N 1023 +#define MADERA_FLLAO_MAX_FBDIV 254 + +#define MADERA_FLL_SYNCHRONISER_OFFS 0x10 +#define CS47L35_FLL_SYNCHRONISER_OFFS 0xE +#define MADERA_FLL_CONTROL_1_OFFS 0x1 +#define MADERA_FLL_CONTROL_2_OFFS 0x2 +#define MADERA_FLL_CONTROL_3_OFFS 0x3 +#define MADERA_FLL_CONTROL_4_OFFS 0x4 +#define MADERA_FLL_CONTROL_5_OFFS 0x5 +#define MADERA_FLL_CONTROL_6_OFFS 0x6 +#define MADERA_FLL_CONTROL_7_OFFS 0x9 +#define MADERA_FLL_EFS_2_OFFS 0xA +#define MADERA_FLL_SYNCHRONISER_1_OFFS 0x1 +#define MADERA_FLL_SYNCHRONISER_2_OFFS 0x2 +#define MADERA_FLL_SYNCHRONISER_3_OFFS 0x3 +#define MADERA_FLL_SYNCHRONISER_4_OFFS 0x4 +#define MADERA_FLL_SYNCHRONISER_5_OFFS 0x5 +#define MADERA_FLL_SYNCHRONISER_6_OFFS 0x6 +#define MADERA_FLL_SYNCHRONISER_7_OFFS 0x7 +#define MADERA_FLL_SPREAD_SPECTRUM_OFFS 0x9 +#define MADERA_FLL_GPIO_CLOCK_OFFS 0xA + +#define MADERA_FLLAO_CONTROL_1_OFFS 0x1 +#define MADERA_FLLAO_CONTROL_2_OFFS 0x2 +#define MADERA_FLLAO_CONTROL_3_OFFS 0x3 +#define MADERA_FLLAO_CONTROL_4_OFFS 0x4 +#define MADERA_FLLAO_CONTROL_5_OFFS 0x5 +#define MADERA_FLLAO_CONTROL_6_OFFS 0x6 +#define MADERA_FLLAO_CONTROL_7_OFFS 0x8 +#define MADERA_FLLAO_CONTROL_8_OFFS 0xA +#define MADERA_FLLAO_CONTROL_9_OFFS 0xB +#define MADERA_FLLAO_CONTROL_10_OFFS 0xC +#define MADERA_FLLAO_CONTROL_11_OFFS 0xD + +#define MADERA_FMT_DSP_MODE_A 0 +#define MADERA_FMT_DSP_MODE_B 1 +#define MADERA_FMT_I2S_MODE 2 +#define MADERA_FMT_LEFT_JUSTIFIED_MODE 3 + +#define madera_fll_err(_fll, fmt, ...) \ + dev_err(_fll->madera->dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__) +#define madera_fll_warn(_fll, fmt, ...) \ + dev_warn(_fll->madera->dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__) +#define madera_fll_dbg(_fll, fmt, ...) \ + dev_dbg(_fll->madera->dev, "FLL%d: " fmt, _fll->id, ##__VA_ARGS__) + +#define madera_aif_err(_dai, fmt, ...) \ + dev_err(_dai->dev, "AIF%d: " fmt, _dai->id, ##__VA_ARGS__) +#define madera_aif_warn(_dai, fmt, ...) \ + dev_warn(_dai->dev, "AIF%d: " fmt, _dai->id, ##__VA_ARGS__) +#define madera_aif_dbg(_dai, fmt, ...) \ + dev_dbg(_dai->dev, "AIF%d: " fmt, _dai->id, ##__VA_ARGS__) + +static const int madera_dsp_bus_error_irqs[MADERA_MAX_ADSP] = { + MADERA_IRQ_DSP1_BUS_ERR, + MADERA_IRQ_DSP2_BUS_ERR, + MADERA_IRQ_DSP3_BUS_ERR, + MADERA_IRQ_DSP4_BUS_ERR, + MADERA_IRQ_DSP5_BUS_ERR, + MADERA_IRQ_DSP6_BUS_ERR, + MADERA_IRQ_DSP7_BUS_ERR, +}; + +static void madera_spin_sysclk(struct madera_priv *priv) +{ + struct madera *madera = priv->madera; + unsigned int val; + int ret, i; + + /* Skip this if the chip is down */ + if (pm_runtime_suspended(madera->dev)) + return; + + /* + * Just read a register a few times to ensure the internal + * oscillator sends out a few clocks. + */ + for (i = 0; i < 4; i++) { + ret = regmap_read(madera->regmap, MADERA_SOFTWARE_RESET, &val); + if (ret) + dev_err(madera->dev, + "Failed to read sysclk spin %d: %d\n", i, ret); + } + + udelay(300); +} + +int madera_sysclk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + + madera_spin_sysclk(priv); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_sysclk_ev); + +static int madera_check_speaker_overheat(struct madera *madera, + bool *warn, bool *shutdown) +{ + unsigned int val; + int ret; + + ret = regmap_read(madera->regmap, MADERA_IRQ1_RAW_STATUS_15, &val); + if (ret) { + dev_err(madera->dev, "Failed to read thermal status: %d\n", + ret); + return ret; + } + + *warn = val & MADERA_SPK_OVERHEAT_WARN_STS1; + *shutdown = val & MADERA_SPK_OVERHEAT_STS1; + + return 0; +} + +int madera_spk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + bool warn, shutdown; + int ret; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + ret = madera_check_speaker_overheat(madera, &warn, &shutdown); + if (ret) + return ret; + + if (shutdown) { + dev_crit(madera->dev, + "Speaker not enabled due to temperature\n"); + return -EBUSY; + } + + regmap_update_bits(madera->regmap, MADERA_OUTPUT_ENABLES_1, + 1 << w->shift, 1 << w->shift); + break; + case SND_SOC_DAPM_PRE_PMD: + regmap_update_bits(madera->regmap, MADERA_OUTPUT_ENABLES_1, + 1 << w->shift, 0); + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(madera_spk_ev); + +static irqreturn_t madera_thermal_warn(int irq, void *data) +{ + struct madera *madera = data; + bool warn, shutdown; + int ret; + + ret = madera_check_speaker_overheat(madera, &warn, &shutdown); + if (ret || shutdown) { /* for safety attempt to shutdown on error */ + dev_crit(madera->dev, "Thermal shutdown\n"); + ret = regmap_update_bits(madera->regmap, + MADERA_OUTPUT_ENABLES_1, + MADERA_OUT4L_ENA | + MADERA_OUT4R_ENA, 0); + if (ret != 0) + dev_crit(madera->dev, + "Failed to disable speaker outputs: %d\n", + ret); + } else if (warn) { + dev_alert(madera->dev, "Thermal warning\n"); + } else { + dev_info(madera->dev, "Spurious thermal warning\n"); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +int madera_init_overheat(struct madera_priv *priv) +{ + struct madera *madera = priv->madera; + struct device *dev = madera->dev; + int ret; + + ret = madera_request_irq(madera, MADERA_IRQ_SPK_OVERHEAT_WARN, + "Thermal warning", madera_thermal_warn, + madera); + if (ret) + dev_err(dev, "Failed to get thermal warning IRQ: %d\n", ret); + + ret = madera_request_irq(madera, MADERA_IRQ_SPK_OVERHEAT, + "Thermal shutdown", madera_thermal_warn, + madera); + if (ret) + dev_err(dev, "Failed to get thermal shutdown IRQ: %d\n", ret); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_init_overheat); + +int madera_free_overheat(struct madera_priv *priv) +{ + struct madera *madera = priv->madera; + + madera_free_irq(madera, MADERA_IRQ_SPK_OVERHEAT_WARN, madera); + madera_free_irq(madera, MADERA_IRQ_SPK_OVERHEAT, madera); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_free_overheat); + +int madera_core_init(struct madera_priv *priv) +{ + int i; + + /* trap undersized array initializers */ + BUILD_BUG_ON(!madera_mixer_texts[MADERA_NUM_MIXER_INPUTS - 1]); + BUILD_BUG_ON(!madera_mixer_values[MADERA_NUM_MIXER_INPUTS - 1]); + + mutex_init(&priv->rate_lock); + + for (i = 0; i < MADERA_MAX_HP_OUTPUT; i++) + priv->madera->out_clamp[i] = true; + + return 0; +} +EXPORT_SYMBOL_GPL(madera_core_init); + +int madera_core_free(struct madera_priv *priv) +{ + mutex_destroy(&priv->rate_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_core_free); + +static void madera_debug_dump_domain_groups(const struct madera_priv *priv) +{ + struct madera *madera = priv->madera; + int i; + + for (i = 0; i < ARRAY_SIZE(priv->domain_group_ref); ++i) + dev_dbg(madera->dev, "domain_grp_ref[%d]=%d\n", i, + priv->domain_group_ref[i]); +} + +int madera_domain_clk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, + int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + int dom_grp = w->shift; + + if (dom_grp >= ARRAY_SIZE(priv->domain_group_ref)) { + WARN(true, "%s dom_grp exceeds array size\n", __func__); + return -EINVAL; + } + + /* + * We can't rely on the DAPM mutex for locking because we need a lock + * that can safely be called in hw_params + */ + mutex_lock(&priv->rate_lock); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + dev_dbg(priv->madera->dev, "Inc ref on domain group %d\n", + dom_grp); + ++priv->domain_group_ref[dom_grp]; + break; + case SND_SOC_DAPM_POST_PMD: + dev_dbg(priv->madera->dev, "Dec ref on domain group %d\n", + dom_grp); + --priv->domain_group_ref[dom_grp]; + break; + default: + break; + } + + madera_debug_dump_domain_groups(priv); + + mutex_unlock(&priv->rate_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_domain_clk_ev); + +int madera_out1_demux_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_dapm_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = + snd_soc_dapm_kcontrol_dapm(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + unsigned int ep_sel, mux, change; + bool out_mono; + int ret; + + if (ucontrol->value.enumerated.item[0] > e->items - 1) + return -EINVAL; + + mux = ucontrol->value.enumerated.item[0]; + + snd_soc_dapm_mutex_lock(dapm); + + ep_sel = mux << MADERA_EP_SEL_SHIFT; + + change = snd_soc_component_test_bits(component, MADERA_OUTPUT_ENABLES_1, + MADERA_EP_SEL_MASK, + ep_sel); + if (!change) + goto end; + + /* EP_SEL should not be modified while HP or EP driver is enabled */ + ret = regmap_update_bits(madera->regmap, MADERA_OUTPUT_ENABLES_1, + MADERA_OUT1L_ENA | MADERA_OUT1R_ENA, 0); + if (ret) + dev_warn(madera->dev, "Failed to disable outputs: %d\n", ret); + + usleep_range(2000, 3000); /* wait for wseq to complete */ + + /* change demux setting */ + if (madera->out_clamp[0]) + ret = regmap_update_bits(madera->regmap, + MADERA_OUTPUT_ENABLES_1, + MADERA_EP_SEL_MASK, ep_sel); + if (ret) { + dev_err(madera->dev, "Failed to set OUT1 demux: %d\n", ret); + } else { + /* apply correct setting for mono mode */ + if (!ep_sel && !madera->pdata.codec.out_mono[0]) + out_mono = false; /* stereo HP */ + else + out_mono = true; /* EP or mono HP */ + + ret = madera_set_output_mode(component, 1, out_mono); + if (ret) + dev_warn(madera->dev, + "Failed to set output mode: %d\n", ret); + } + + /* + * if HPDET has disabled the clamp while switching to HPOUT + * OUT1 should remain disabled + */ + if (ep_sel || + (madera->out_clamp[0] && !madera->out_shorted[0])) { + ret = regmap_update_bits(madera->regmap, + MADERA_OUTPUT_ENABLES_1, + MADERA_OUT1L_ENA | MADERA_OUT1R_ENA, + madera->hp_ena); + if (ret) + dev_warn(madera->dev, + "Failed to restore earpiece outputs: %d\n", + ret); + else if (madera->hp_ena) + msleep(34); /* wait for enable wseq */ + else + usleep_range(2000, 3000); /* wait for disable wseq */ + } + +end: + snd_soc_dapm_mutex_unlock(dapm); + + return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); +} +EXPORT_SYMBOL_GPL(madera_out1_demux_put); + +int madera_out1_demux_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_dapm_kcontrol_component(kcontrol); + unsigned int val; + int ret; + + ret = snd_soc_component_read(component, MADERA_OUTPUT_ENABLES_1, &val); + if (ret) + return ret; + + val &= MADERA_EP_SEL_MASK; + val >>= MADERA_EP_SEL_SHIFT; + ucontrol->value.enumerated.item[0] = val; + + return 0; +} +EXPORT_SYMBOL_GPL(madera_out1_demux_get); + +static int madera_inmux_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_dapm_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = + snd_soc_dapm_kcontrol_dapm(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + struct regmap *regmap = madera->regmap; + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + unsigned int mux, val, mask; + unsigned int inmode; + bool changed; + int ret; + + mux = ucontrol->value.enumerated.item[0]; + if (mux > 1) + return -EINVAL; + + val = mux << e->shift_l; + mask = (e->mask << e->shift_l) | MADERA_IN1L_SRC_SE_MASK; + + switch (e->reg) { + case MADERA_ADC_DIGITAL_VOLUME_1L: + inmode = madera->pdata.codec.inmode[0][2 * mux]; + break; + case MADERA_ADC_DIGITAL_VOLUME_1R: + inmode = madera->pdata.codec.inmode[0][1 + (2 * mux)]; + break; + case MADERA_ADC_DIGITAL_VOLUME_2L: + inmode = madera->pdata.codec.inmode[1][2 * mux]; + break; + case MADERA_ADC_DIGITAL_VOLUME_2R: + inmode = madera->pdata.codec.inmode[1][1 + (2 * mux)]; + break; + default: + return -EINVAL; + } + + if (inmode & MADERA_INMODE_SE) + val |= 1 << MADERA_IN1L_SRC_SE_SHIFT; + + dev_dbg(madera->dev, "mux=%u reg=0x%x inmode=0x%x mask=0x%x val=0x%x\n", + mux, e->reg, inmode, mask, val); + + ret = regmap_update_bits_check(regmap, e->reg, mask, val, &changed); + if (ret < 0) + return ret; + + if (changed) + return snd_soc_dapm_mux_update_power(dapm, kcontrol, + mux, e, NULL); + else + return 0; +} + +static const char * const madera_inmux_texts[] = { + "A", + "B", +}; + +static SOC_ENUM_SINGLE_DECL(madera_in1muxl_enum, + MADERA_ADC_DIGITAL_VOLUME_1L, + MADERA_IN1L_SRC_SHIFT, + madera_inmux_texts); + +static SOC_ENUM_SINGLE_DECL(madera_in1muxr_enum, + MADERA_ADC_DIGITAL_VOLUME_1R, + MADERA_IN1R_SRC_SHIFT, + madera_inmux_texts); + +static SOC_ENUM_SINGLE_DECL(madera_in2muxl_enum, + MADERA_ADC_DIGITAL_VOLUME_2L, + MADERA_IN2L_SRC_SHIFT, + madera_inmux_texts); + +static SOC_ENUM_SINGLE_DECL(madera_in2muxr_enum, + MADERA_ADC_DIGITAL_VOLUME_2R, + MADERA_IN2R_SRC_SHIFT, + madera_inmux_texts); + +const struct snd_kcontrol_new madera_inmux[] = { + SOC_DAPM_ENUM_EXT("IN1L Mux", madera_in1muxl_enum, + snd_soc_dapm_get_enum_double, madera_inmux_put), + SOC_DAPM_ENUM_EXT("IN1R Mux", madera_in1muxr_enum, + snd_soc_dapm_get_enum_double, madera_inmux_put), + SOC_DAPM_ENUM_EXT("IN2L Mux", madera_in2muxl_enum, + snd_soc_dapm_get_enum_double, madera_inmux_put), + SOC_DAPM_ENUM_EXT("IN2R Mux", madera_in2muxr_enum, + snd_soc_dapm_get_enum_double, madera_inmux_put), +}; +EXPORT_SYMBOL_GPL(madera_inmux); + +static const char * const madera_dmode_texts[] = { + "Analog", + "Digital", +}; + +static SOC_ENUM_SINGLE_DECL(madera_in1dmode_enum, + MADERA_IN1L_CONTROL, + MADERA_IN1_MODE_SHIFT, + madera_dmode_texts); + +static SOC_ENUM_SINGLE_DECL(madera_in2dmode_enum, + MADERA_IN2L_CONTROL, + MADERA_IN2_MODE_SHIFT, + madera_dmode_texts); + +static SOC_ENUM_SINGLE_DECL(madera_in3dmode_enum, + MADERA_IN3L_CONTROL, + MADERA_IN3_MODE_SHIFT, + madera_dmode_texts); + +const struct snd_kcontrol_new madera_inmode[] = { + SOC_DAPM_ENUM("IN1 Mode", madera_in1dmode_enum), + SOC_DAPM_ENUM("IN2 Mode", madera_in2dmode_enum), + SOC_DAPM_ENUM("IN3 Mode", madera_in3dmode_enum), +}; +EXPORT_SYMBOL_GPL(madera_inmode); + +static bool madera_can_change_grp_rate(const struct madera_priv *priv, + unsigned int reg) +{ + int count; + + switch (reg) { + case MADERA_FX_CTRL1: + count = priv->domain_group_ref[MADERA_DOM_GRP_FX]; + break; + case MADERA_ASRC1_RATE1: + case MADERA_ASRC1_RATE2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ASRC1]; + break; + case MADERA_ASRC2_RATE1: + case MADERA_ASRC2_RATE2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ASRC2]; + break; + case MADERA_ISRC_1_CTRL_1: + case MADERA_ISRC_1_CTRL_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ISRC1]; + break; + case MADERA_ISRC_2_CTRL_1: + case MADERA_ISRC_2_CTRL_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ISRC2]; + break; + case MADERA_ISRC_3_CTRL_1: + case MADERA_ISRC_3_CTRL_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ISRC3]; + break; + case MADERA_ISRC_4_CTRL_1: + case MADERA_ISRC_4_CTRL_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_ISRC4]; + break; + case MADERA_OUTPUT_RATE_1: + count = priv->domain_group_ref[MADERA_DOM_GRP_OUT]; + break; + case MADERA_SPD1_TX_CONTROL: + count = priv->domain_group_ref[MADERA_DOM_GRP_SPD]; + break; + case MADERA_DSP1_CONFIG_1: + case MADERA_DSP1_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP1]; + break; + case MADERA_DSP2_CONFIG_1: + case MADERA_DSP2_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP2]; + break; + case MADERA_DSP3_CONFIG_1: + case MADERA_DSP3_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP3]; + break; + case MADERA_DSP4_CONFIG_1: + case MADERA_DSP4_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP4]; + break; + case MADERA_DSP5_CONFIG_1: + case MADERA_DSP5_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP5]; + break; + case MADERA_DSP6_CONFIG_1: + case MADERA_DSP6_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP6]; + break; + case MADERA_DSP7_CONFIG_1: + case MADERA_DSP7_CONFIG_2: + count = priv->domain_group_ref[MADERA_DOM_GRP_DSP7]; + break; + case MADERA_AIF1_RATE_CTRL: + count = priv->domain_group_ref[MADERA_DOM_GRP_AIF1]; + break; + case MADERA_AIF2_RATE_CTRL: + count = priv->domain_group_ref[MADERA_DOM_GRP_AIF2]; + break; + case MADERA_AIF3_RATE_CTRL: + count = priv->domain_group_ref[MADERA_DOM_GRP_AIF3]; + break; + case MADERA_AIF4_RATE_CTRL: + count = priv->domain_group_ref[MADERA_DOM_GRP_AIF4]; + break; + case MADERA_SLIMBUS_RATES_1: + case MADERA_SLIMBUS_RATES_2: + case MADERA_SLIMBUS_RATES_3: + case MADERA_SLIMBUS_RATES_4: + case MADERA_SLIMBUS_RATES_5: + case MADERA_SLIMBUS_RATES_6: + case MADERA_SLIMBUS_RATES_7: + case MADERA_SLIMBUS_RATES_8: + count = priv->domain_group_ref[MADERA_DOM_GRP_SLIMBUS]; + break; + case MADERA_PWM_DRIVE_1: + count = priv->domain_group_ref[MADERA_DOM_GRP_PWM]; + break; + default: + return false; + } + + dev_dbg(priv->madera->dev, "Rate reg 0x%x group ref %d\n", reg, count); + + if (count) + return false; + else + return true; +} + +static int madera_adsp_rate_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + unsigned int cached_rate; + const int adsp_num = e->shift_l; + int item; + + mutex_lock(&priv->rate_lock); + cached_rate = priv->adsp_rate_cache[adsp_num]; + mutex_unlock(&priv->rate_lock); + + item = snd_soc_enum_val_to_item(e, cached_rate); + ucontrol->value.enumerated.item[0] = item; + + return 0; +} + +static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + const int adsp_num = e->shift_l; + const unsigned int item = ucontrol->value.enumerated.item[0]; + int ret; + + if (item >= e->items) + return -EINVAL; + + /* + * We don't directly write the rate register here but we want to + * maintain consistent behaviour that rate domains cannot be changed + * while in use since this is a hardware requirement + */ + mutex_lock(&priv->rate_lock); + + if (!madera_can_change_grp_rate(priv, priv->adsp[adsp_num].base)) { + dev_warn(priv->madera->dev, + "Cannot change '%s' while in use by active audio paths\n", + kcontrol->id.name); + ret = -EBUSY; + } else { + /* Volatile register so defer until the codec is powered up */ + priv->adsp_rate_cache[adsp_num] = e->values[item]; + ret = 0; + } + + mutex_unlock(&priv->rate_lock); + + return ret; +} + +static const struct soc_enum madera_adsp_rate_enum[] = { + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 0, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 1, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 2, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 3, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 4, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 5, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(SND_SOC_NOPM, 6, 0xf, MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), +}; + +const struct snd_kcontrol_new madera_adsp_rate_controls[] = { + SOC_ENUM_EXT("DSP1 Rate", madera_adsp_rate_enum[0], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP2 Rate", madera_adsp_rate_enum[1], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP3 Rate", madera_adsp_rate_enum[2], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP4 Rate", madera_adsp_rate_enum[3], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP5 Rate", madera_adsp_rate_enum[4], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP6 Rate", madera_adsp_rate_enum[5], + madera_adsp_rate_get, madera_adsp_rate_put), + SOC_ENUM_EXT("DSP7 Rate", madera_adsp_rate_enum[6], + madera_adsp_rate_get, madera_adsp_rate_put), +}; +EXPORT_SYMBOL_GPL(madera_adsp_rate_controls); + +static int madera_write_adsp_clk_setting(struct madera_priv *priv, + struct wm_adsp *dsp, + unsigned int freq) +{ + unsigned int val; + unsigned int mask = MADERA_DSP_RATE_MASK; + int ret; + + val = priv->adsp_rate_cache[dsp->num - 1] << MADERA_DSP_RATE_SHIFT; + + switch (priv->madera->type) { + case CS47L35: + case CS47L85: + case WM1840: + /* use legacy frequency registers */ + mask |= MADERA_DSP_CLK_SEL_MASK; + val |= (freq << MADERA_DSP_CLK_SEL_SHIFT); + break; + default: + /* Configure exact dsp frequency */ + dev_dbg(priv->madera->dev, "Set DSP frequency to 0x%x\n", freq); + + ret = regmap_write(dsp->regmap, + dsp->base + MADERA_DSP_CONFIG_2_OFFS, freq); + if (ret) + goto err; + break; + } + + ret = regmap_update_bits(dsp->regmap, + dsp->base + MADERA_DSP_CONFIG_1_OFFS, + mask, val); + if (ret) + goto err; + + dev_dbg(priv->madera->dev, "Set DSP clocking to 0x%x\n", val); + + return 0; + +err: + dev_err(dsp->dev, "Failed to set DSP%d clock: %d\n", dsp->num, ret); + + return ret; +} + +int madera_set_adsp_clk(struct madera_priv *priv, int dsp_num, + unsigned int freq) +{ + struct wm_adsp *dsp = &priv->adsp[dsp_num]; + struct madera *madera = priv->madera; + unsigned int cur, new; + int ret; + + /* + * This is called at a higher DAPM priority than the mux widgets so + * the muxes are still off at this point and it's safe to change + * the rate domain control. + * Also called at a lower DAPM priority than the domain group widgets + * so locking the reads of adsp_rate_cache is not necessary as we know + * changes are locked out by the domain_group_ref reference count. + */ + + ret = regmap_read(dsp->regmap, dsp->base, &cur); + if (ret) { + dev_err(madera->dev, + "Failed to read current DSP rate: %d\n", ret); + return ret; + } + + cur &= MADERA_DSP_RATE_MASK; + + new = priv->adsp_rate_cache[dsp->num - 1] << MADERA_DSP_RATE_SHIFT; + + if (new == cur) { + dev_dbg(madera->dev, "DSP rate not changed\n"); + return madera_write_adsp_clk_setting(priv, dsp, freq); + } else { + dev_dbg(madera->dev, "DSP rate changed\n"); + + /* The write must be guarded by a number of SYSCLK cycles */ + madera_spin_sysclk(priv); + ret = madera_write_adsp_clk_setting(priv, dsp, freq); + madera_spin_sysclk(priv); + return ret; + } +} +EXPORT_SYMBOL_GPL(madera_set_adsp_clk); + +int madera_rate_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + unsigned int item = ucontrol->value.enumerated.item[0]; + unsigned int val; + int ret; + + if (item >= e->items) + return -EINVAL; + + /* + * Prevent the domain powering up while we're checking whether it's + * safe to change rate domain + */ + mutex_lock(&priv->rate_lock); + + ret = snd_soc_component_read(component, e->reg, &val); + if (ret < 0) { + dev_warn(priv->madera->dev, "Failed to read 0x%x (%d)\n", + e->reg, ret); + goto out; + } + val >>= e->shift_l; + val &= e->mask; + if (snd_soc_enum_item_to_val(e, item) == val) { + ret = 0; + goto out; + } + + if (!madera_can_change_grp_rate(priv, e->reg)) { + dev_warn(priv->madera->dev, + "Cannot change '%s' while in use by active audio paths\n", + kcontrol->id.name); + ret = -EBUSY; + } else { + /* The write must be guarded by a number of SYSCLK cycles */ + madera_spin_sysclk(priv); + ret = snd_soc_put_enum_double(kcontrol, ucontrol); + madera_spin_sysclk(priv); + } +out: + mutex_unlock(&priv->rate_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_rate_put); + +static void madera_configure_input_mode(struct madera *madera) +{ + unsigned int dig_mode, ana_mode_l, ana_mode_r; + int max_analogue_inputs, max_dmic_sup, i; + + switch (madera->type) { + case CS47L35: + max_analogue_inputs = 2; + max_dmic_sup = 2; + break; + case CS47L85: + case WM1840: + max_analogue_inputs = 3; + max_dmic_sup = 3; + break; + case CS47L90: + case CS47L91: + max_analogue_inputs = 2; + max_dmic_sup = 2; + break; + default: + max_analogue_inputs = 2; + max_dmic_sup = 4; + break; + } + + /* + * Initialize input modes from the A settings. For muxed inputs the + * B settings will be applied if the mux is changed + */ + for (i = 0; i < max_dmic_sup; i++) { + dev_dbg(madera->dev, "IN%d mode %u:%u:%u:%u\n", i + 1, + madera->pdata.codec.inmode[i][0], + madera->pdata.codec.inmode[i][1], + madera->pdata.codec.inmode[i][2], + madera->pdata.codec.inmode[i][3]); + + dig_mode = madera->pdata.codec.dmic_ref[i] << + MADERA_IN1_DMIC_SUP_SHIFT; + + switch (madera->pdata.codec.inmode[i][0]) { + case MADERA_INMODE_DIFF: + ana_mode_l = 0; + break; + case MADERA_INMODE_SE: + ana_mode_l = 1 << MADERA_IN1L_SRC_SE_SHIFT; + break; + default: + dev_warn(madera->dev, + "IN%dAL Illegal inmode %u ignored\n", + i + 1, madera->pdata.codec.inmode[i][0]); + continue; + } + + switch (madera->pdata.codec.inmode[i][1]) { + case MADERA_INMODE_DIFF: + ana_mode_r = 0; + break; + case MADERA_INMODE_SE: + ana_mode_r = 1 << MADERA_IN1R_SRC_SE_SHIFT; + break; + default: + dev_warn(madera->dev, + "IN%dAR Illegal inmode %u ignored\n", + i + 1, madera->pdata.codec.inmode[i][1]); + continue; + } + + dev_dbg(madera->dev, + "IN%dA DMIC mode=0x%x Analogue mode=0x%x,0x%x\n", + i + 1, dig_mode, ana_mode_l, ana_mode_r); + + regmap_update_bits(madera->regmap, + MADERA_IN1L_CONTROL + (i * 8), + MADERA_IN1_DMIC_SUP_MASK, dig_mode); + + if (i >= max_analogue_inputs) + continue; + + regmap_update_bits(madera->regmap, + MADERA_ADC_DIGITAL_VOLUME_1L + (i * 8), + MADERA_IN1L_SRC_SE_MASK, ana_mode_l); + + regmap_update_bits(madera->regmap, + MADERA_ADC_DIGITAL_VOLUME_1R + (i * 8), + MADERA_IN1R_SRC_SE_MASK, ana_mode_r); + } +} + +int madera_init_inputs(struct snd_soc_component *component) +{ + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + + madera_configure_input_mode(madera); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_init_inputs); + +static const struct snd_soc_dapm_route madera_mono_routes[] = { + { "OUT1R", NULL, "OUT1L" }, + { "OUT2R", NULL, "OUT2L" }, + { "OUT3R", NULL, "OUT3L" }, + { "OUT4R", NULL, "OUT4L" }, + { "OUT5R", NULL, "OUT5L" }, + { "OUT6R", NULL, "OUT6L" }, +}; + +int madera_init_outputs(struct snd_soc_component *component, int n_mono_routes) +{ + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + const struct madera_codec_pdata *pdata = &madera->pdata.codec; + unsigned int val; + int i; + + if (n_mono_routes > MADERA_MAX_OUTPUT) { + dev_warn(madera->dev, + "Requested %d mono outputs, using maximum allowed %d\n", + n_mono_routes, MADERA_MAX_OUTPUT); + n_mono_routes = MADERA_MAX_OUTPUT; + } + + for (i = 0; i < n_mono_routes; i++) { + /* Default is 0 so noop with defaults */ + if (pdata->out_mono[i]) { + val = MADERA_OUT1_MONO; + snd_soc_dapm_add_routes(dapm, + &madera_mono_routes[i], 1); + } else { + val = 0; + } + + regmap_update_bits(madera->regmap, + MADERA_OUTPUT_PATH_CONFIG_1L + (i * 8), + MADERA_OUT1_MONO, val); + + dev_dbg(madera->dev, "OUT%d mono=0x%x\n", i + 1, val); + } + + for (i = 0; i < MADERA_MAX_PDM_SPK; i++) { + dev_dbg(madera->dev, "PDM%d fmt=0x%x mute=0x%x\n", i + 1, + pdata->pdm_fmt[i], pdata->pdm_mute[i]); + + if (pdata->pdm_mute[i]) + regmap_update_bits(madera->regmap, + MADERA_PDM_SPK1_CTRL_1 + (i * 2), + MADERA_SPK1_MUTE_ENDIAN_MASK | + MADERA_SPK1_MUTE_SEQ1_MASK, + pdata->pdm_mute[i]); + + if (pdata->pdm_fmt[i]) + regmap_update_bits(madera->regmap, + MADERA_PDM_SPK1_CTRL_2 + (i * 2), + MADERA_SPK1_FMT_MASK, + pdata->pdm_fmt[i]); + } + + return 0; +} +EXPORT_SYMBOL_GPL(madera_init_outputs); + +int madera_init_bus_error_irq(struct madera_priv *priv, int dsp_num, + irq_handler_t handler) +{ + struct madera *madera = priv->madera; + int ret; + + ret = madera_request_irq(madera, + madera_dsp_bus_error_irqs[dsp_num], + "ADSP2 bus error", + handler, + &priv->adsp[dsp_num]); + if (ret) + dev_err(madera->dev, + "Failed to request DSP Lock region IRQ: %d\n", ret); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_init_bus_error_irq); + +void madera_free_bus_error_irq(struct madera_priv *priv, int dsp_num) +{ + struct madera *madera = priv->madera; + + madera_free_irq(madera, + madera_dsp_bus_error_irqs[dsp_num], + &priv->adsp[dsp_num]); +} +EXPORT_SYMBOL_GPL(madera_free_bus_error_irq); + +const char * const madera_mixer_texts[] = { + "None", + "Tone Generator 1", + "Tone Generator 2", + "Haptics", + "AEC1", + "AEC2", + "Mic Mute Mixer", + "Noise Generator", + "IN1L", + "IN1R", + "IN2L", + "IN2R", + "IN3L", + "IN3R", + "IN4L", + "IN4R", + "IN5L", + "IN5R", + "IN6L", + "IN6R", + "AIF1RX1", + "AIF1RX2", + "AIF1RX3", + "AIF1RX4", + "AIF1RX5", + "AIF1RX6", + "AIF1RX7", + "AIF1RX8", + "AIF2RX1", + "AIF2RX2", + "AIF2RX3", + "AIF2RX4", + "AIF2RX5", + "AIF2RX6", + "AIF2RX7", + "AIF2RX8", + "AIF3RX1", + "AIF3RX2", + "AIF3RX3", + "AIF3RX4", + "AIF4RX1", + "AIF4RX2", + "SLIMRX1", + "SLIMRX2", + "SLIMRX3", + "SLIMRX4", + "SLIMRX5", + "SLIMRX6", + "SLIMRX7", + "SLIMRX8", + "EQ1", + "EQ2", + "EQ3", + "EQ4", + "DRC1L", + "DRC1R", + "DRC2L", + "DRC2R", + "LHPF1", + "LHPF2", + "LHPF3", + "LHPF4", + "DSP1.1", + "DSP1.2", + "DSP1.3", + "DSP1.4", + "DSP1.5", + "DSP1.6", + "DSP2.1", + "DSP2.2", + "DSP2.3", + "DSP2.4", + "DSP2.5", + "DSP2.6", + "DSP3.1", + "DSP3.2", + "DSP3.3", + "DSP3.4", + "DSP3.5", + "DSP3.6", + "DSP4.1", + "DSP4.2", + "DSP4.3", + "DSP4.4", + "DSP4.5", + "DSP4.6", + "DSP5.1", + "DSP5.2", + "DSP5.3", + "DSP5.4", + "DSP5.5", + "DSP5.6", + "DSP6.1", + "DSP6.2", + "DSP6.3", + "DSP6.4", + "DSP6.5", + "DSP6.6", + "DSP7.1", + "DSP7.2", + "DSP7.3", + "DSP7.4", + "DSP7.5", + "DSP7.6", + "ASRC1IN1L", + "ASRC1IN1R", + "ASRC1IN2L", + "ASRC1IN2R", + "ASRC2IN1L", + "ASRC2IN1R", + "ASRC2IN2L", + "ASRC2IN2R", + "ISRC1INT1", + "ISRC1INT2", + "ISRC1INT3", + "ISRC1INT4", + "ISRC1DEC1", + "ISRC1DEC2", + "ISRC1DEC3", + "ISRC1DEC4", + "ISRC2INT1", + "ISRC2INT2", + "ISRC2INT3", + "ISRC2INT4", + "ISRC2DEC1", + "ISRC2DEC2", + "ISRC2DEC3", + "ISRC2DEC4", + "ISRC3INT1", + "ISRC3INT2", + "ISRC3INT3", + "ISRC3INT4", + "ISRC3DEC1", + "ISRC3DEC2", + "ISRC3DEC3", + "ISRC3DEC4", + "ISRC4INT1", + "ISRC4INT2", + "ISRC4DEC1", + "ISRC4DEC2", + "DFC1", + "DFC2", + "DFC3", + "DFC4", + "DFC5", + "DFC6", + "DFC7", + "DFC8", +}; +EXPORT_SYMBOL_GPL(madera_mixer_texts); + +const unsigned int madera_mixer_values[] = { + 0x00, /* None */ + 0x04, /* Tone Generator 1 */ + 0x05, /* Tone Generator 2 */ + 0x06, /* Haptics */ + 0x08, /* AEC */ + 0x09, /* AEC2 */ + 0x0c, /* Noise mixer */ + 0x0d, /* Comfort noise */ + 0x10, /* IN1L */ + 0x11, + 0x12, + 0x13, + 0x14, + 0x15, + 0x16, + 0x17, + 0x18, + 0x19, + 0x1A, + 0x1B, + 0x20, /* AIF1RX1 */ + 0x21, + 0x22, + 0x23, + 0x24, + 0x25, + 0x26, + 0x27, + 0x28, /* AIF2RX1 */ + 0x29, + 0x2a, + 0x2b, + 0x2c, + 0x2d, + 0x2e, + 0x2f, + 0x30, /* AIF3RX1 */ + 0x31, + 0x32, + 0x33, + 0x34, /* AIF4RX1 */ + 0x35, + 0x38, /* SLIMRX1 */ + 0x39, + 0x3a, + 0x3b, + 0x3c, + 0x3d, + 0x3e, + 0x3f, + 0x50, /* EQ1 */ + 0x51, + 0x52, + 0x53, + 0x58, /* DRC1L */ + 0x59, + 0x5a, + 0x5b, + 0x60, /* LHPF1 */ + 0x61, + 0x62, + 0x63, + 0x68, /* DSP1.1 */ + 0x69, + 0x6a, + 0x6b, + 0x6c, + 0x6d, + 0x70, /* DSP2.1 */ + 0x71, + 0x72, + 0x73, + 0x74, + 0x75, + 0x78, /* DSP3.1 */ + 0x79, + 0x7a, + 0x7b, + 0x7c, + 0x7d, + 0x80, /* DSP4.1 */ + 0x81, + 0x82, + 0x83, + 0x84, + 0x85, + 0x88, /* DSP5.1 */ + 0x89, + 0x8a, + 0x8b, + 0x8c, + 0x8d, + 0xc0, /* DSP6.1 */ + 0xc1, + 0xc2, + 0xc3, + 0xc4, + 0xc5, + 0xc8, /* DSP7.1 */ + 0xc9, + 0xca, + 0xcb, + 0xcc, + 0xcd, + 0x90, /* ASRC1IN1L */ + 0x91, + 0x92, + 0x93, + 0x94, /* ASRC2IN1L */ + 0x95, + 0x96, + 0x97, + 0xa0, /* ISRC1INT1 */ + 0xa1, + 0xa2, + 0xa3, + 0xa4, /* ISRC1DEC1 */ + 0xa5, + 0xa6, + 0xa7, + 0xa8, /* ISRC2DEC1 */ + 0xa9, + 0xaa, + 0xab, + 0xac, /* ISRC2INT1 */ + 0xad, + 0xae, + 0xaf, + 0xb0, /* ISRC3DEC1 */ + 0xb1, + 0xb2, + 0xb3, + 0xb4, /* ISRC3INT1 */ + 0xb5, + 0xb6, + 0xb7, + 0xb8, /* ISRC4INT1 */ + 0xb9, + 0xbc, /* ISRC4DEC1 */ + 0xbd, + 0xf8, /* DFC1 */ + 0xf9, + 0xfa, + 0xfb, + 0xfc, + 0xfd, + 0xfe, + 0xff, /* DFC8 */ +}; +EXPORT_SYMBOL_GPL(madera_mixer_values); + +const DECLARE_TLV_DB_SCALE(madera_ana_tlv, 0, 100, 0); +EXPORT_SYMBOL_GPL(madera_ana_tlv); + +const DECLARE_TLV_DB_SCALE(madera_eq_tlv, -1200, 100, 0); +EXPORT_SYMBOL_GPL(madera_eq_tlv); + +const DECLARE_TLV_DB_SCALE(madera_digital_tlv, -6400, 50, 0); +EXPORT_SYMBOL_GPL(madera_digital_tlv); + +const DECLARE_TLV_DB_SCALE(madera_noise_tlv, -13200, 600, 0); +EXPORT_SYMBOL_GPL(madera_noise_tlv); + +const DECLARE_TLV_DB_SCALE(madera_ng_tlv, -12000, 600, 0); +EXPORT_SYMBOL_GPL(madera_ng_tlv); + +const DECLARE_TLV_DB_SCALE(madera_mixer_tlv, -3200, 100, 0); +EXPORT_SYMBOL_GPL(madera_mixer_tlv); + +const char * const madera_rate_text[MADERA_RATE_ENUM_SIZE] = { + "SYNCCLK rate 1", "SYNCCLK rate 2", "SYNCCLK rate 3", + "ASYNCCLK rate 1", "ASYNCCLK rate 2", +}; +EXPORT_SYMBOL_GPL(madera_rate_text); + +const unsigned int madera_rate_val[MADERA_RATE_ENUM_SIZE] = { + 0x0, 0x1, 0x2, 0x8, 0x9, +}; +EXPORT_SYMBOL_GPL(madera_rate_val); + +static const char * const madera_dfc_width_text[MADERA_DFC_WIDTH_ENUM_SIZE] = { + "8 bit", "16 bit", "20 bit", "24 bit", "32 bit", +}; + +static const unsigned int madera_dfc_width_val[MADERA_DFC_WIDTH_ENUM_SIZE] = { + 7, 15, 19, 23, 31, +}; + +static const char * const madera_dfc_type_text[MADERA_DFC_TYPE_ENUM_SIZE] = { + "Fixed", "Unsigned Fixed", "Single Precision Floating", + "Half Precision Floating", "Arm Alternative Floating", +}; + +static const unsigned int madera_dfc_type_val[MADERA_DFC_TYPE_ENUM_SIZE] = { + 0, 1, 2, 4, 5, +}; + +const struct soc_enum madera_dfc_width[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_DFC1_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC1_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC2_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC2_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC3_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC3_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC4_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC4_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC5_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC5_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC6_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC6_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC7_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC7_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC8_RX, + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + MADERA_DFC1_RX_DATA_WIDTH_MASK >> + MADERA_DFC1_RX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC8_TX, + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + MADERA_DFC1_TX_DATA_WIDTH_MASK >> + MADERA_DFC1_TX_DATA_WIDTH_SHIFT, + ARRAY_SIZE(madera_dfc_width_text), + madera_dfc_width_text, + madera_dfc_width_val), +}; +EXPORT_SYMBOL_GPL(madera_dfc_width); + +const struct soc_enum madera_dfc_type[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_DFC1_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC1_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC2_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC2_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC3_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC3_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC4_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC4_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC5_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC5_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC6_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC6_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC7_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC7_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC8_RX, + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + MADERA_DFC1_RX_DATA_TYPE_MASK >> + MADERA_DFC1_RX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DFC8_TX, + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + MADERA_DFC1_TX_DATA_TYPE_MASK >> + MADERA_DFC1_TX_DATA_TYPE_SHIFT, + ARRAY_SIZE(madera_dfc_type_text), + madera_dfc_type_text, + madera_dfc_type_val), +}; +EXPORT_SYMBOL_GPL(madera_dfc_type); + +const struct soc_enum madera_isrc_fsh[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_1_CTRL_1, + MADERA_ISRC1_FSH_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_2_CTRL_1, + MADERA_ISRC2_FSH_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_3_CTRL_1, + MADERA_ISRC3_FSH_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_4_CTRL_1, + MADERA_ISRC4_FSH_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + +}; +EXPORT_SYMBOL_GPL(madera_isrc_fsh); + +const struct soc_enum madera_isrc_fsl[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_1_CTRL_2, + MADERA_ISRC1_FSL_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_2_CTRL_2, + MADERA_ISRC2_FSL_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_3_CTRL_2, + MADERA_ISRC3_FSL_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ISRC_4_CTRL_2, + MADERA_ISRC4_FSL_SHIFT, 0xf, + MADERA_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + +}; +EXPORT_SYMBOL_GPL(madera_isrc_fsl); + +const struct soc_enum madera_asrc1_rate[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_ASRC1_RATE1, + MADERA_ASRC1_RATE1_SHIFT, 0xf, + MADERA_SYNC_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ASRC1_RATE2, + MADERA_ASRC1_RATE1_SHIFT, 0xf, + MADERA_ASYNC_RATE_ENUM_SIZE, + madera_rate_text + MADERA_SYNC_RATE_ENUM_SIZE, + madera_rate_val + MADERA_SYNC_RATE_ENUM_SIZE), + +}; +EXPORT_SYMBOL_GPL(madera_asrc1_rate); + +const struct soc_enum madera_asrc2_rate[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_ASRC2_RATE1, + MADERA_ASRC2_RATE1_SHIFT, 0xf, + MADERA_SYNC_RATE_ENUM_SIZE, + madera_rate_text, madera_rate_val), + SOC_VALUE_ENUM_SINGLE(MADERA_ASRC2_RATE2, + MADERA_ASRC2_RATE2_SHIFT, 0xf, + MADERA_ASYNC_RATE_ENUM_SIZE, + madera_rate_text + MADERA_SYNC_RATE_ENUM_SIZE, + madera_rate_val + MADERA_SYNC_RATE_ENUM_SIZE), + +}; +EXPORT_SYMBOL_GPL(madera_asrc2_rate); + +static const char * const madera_vol_ramp_text[] = { + "0ms/6dB", "0.5ms/6dB", "1ms/6dB", "2ms/6dB", "4ms/6dB", "8ms/6dB", + "15ms/6dB", "30ms/6dB", +}; + +SOC_ENUM_SINGLE_DECL(madera_in_vd_ramp, + MADERA_INPUT_VOLUME_RAMP, + MADERA_IN_VD_RAMP_SHIFT, + madera_vol_ramp_text); +EXPORT_SYMBOL_GPL(madera_in_vd_ramp); + +SOC_ENUM_SINGLE_DECL(madera_in_vi_ramp, + MADERA_INPUT_VOLUME_RAMP, + MADERA_IN_VI_RAMP_SHIFT, + madera_vol_ramp_text); +EXPORT_SYMBOL_GPL(madera_in_vi_ramp); + +SOC_ENUM_SINGLE_DECL(madera_out_vd_ramp, + MADERA_OUTPUT_VOLUME_RAMP, + MADERA_OUT_VD_RAMP_SHIFT, + madera_vol_ramp_text); +EXPORT_SYMBOL_GPL(madera_out_vd_ramp); + +SOC_ENUM_SINGLE_DECL(madera_out_vi_ramp, + MADERA_OUTPUT_VOLUME_RAMP, + MADERA_OUT_VI_RAMP_SHIFT, + madera_vol_ramp_text); +EXPORT_SYMBOL_GPL(madera_out_vi_ramp); + +static const char * const madera_lhpf_mode_text[] = { + "Low-pass", "High-pass" +}; + +SOC_ENUM_SINGLE_DECL(madera_lhpf1_mode, + MADERA_HPLPF1_1, + MADERA_LHPF1_MODE_SHIFT, + madera_lhpf_mode_text); +EXPORT_SYMBOL_GPL(madera_lhpf1_mode); + +SOC_ENUM_SINGLE_DECL(madera_lhpf2_mode, + MADERA_HPLPF2_1, + MADERA_LHPF2_MODE_SHIFT, + madera_lhpf_mode_text); +EXPORT_SYMBOL_GPL(madera_lhpf2_mode); + +SOC_ENUM_SINGLE_DECL(madera_lhpf3_mode, + MADERA_HPLPF3_1, + MADERA_LHPF3_MODE_SHIFT, + madera_lhpf_mode_text); +EXPORT_SYMBOL_GPL(madera_lhpf3_mode); + +SOC_ENUM_SINGLE_DECL(madera_lhpf4_mode, + MADERA_HPLPF4_1, + MADERA_LHPF4_MODE_SHIFT, + madera_lhpf_mode_text); +EXPORT_SYMBOL_GPL(madera_lhpf4_mode); + +static const char * const madera_ng_hold_text[] = { + "30ms", "120ms", "250ms", "500ms", +}; + +SOC_ENUM_SINGLE_DECL(madera_ng_hold, + MADERA_NOISE_GATE_CONTROL, + MADERA_NGATE_HOLD_SHIFT, + madera_ng_hold_text); +EXPORT_SYMBOL_GPL(madera_ng_hold); + +static const char * const madera_in_hpf_cut_text[] = { + "2.5Hz", "5Hz", "10Hz", "20Hz", "40Hz" +}; + +SOC_ENUM_SINGLE_DECL(madera_in_hpf_cut_enum, + MADERA_HPF_CONTROL, + MADERA_IN_HPF_CUT_SHIFT, + madera_in_hpf_cut_text); +EXPORT_SYMBOL_GPL(madera_in_hpf_cut_enum); + +static const char * const madera_in_dmic_osr_text[MADERA_OSR_ENUM_SIZE] = { + "384kHz", "768kHz", "1.536MHz", "3.072MHz", "6.144MHz", +}; + +static const unsigned int madera_in_dmic_osr_val[MADERA_OSR_ENUM_SIZE] = { + 2, 3, 4, 5, 6, +}; + +const struct soc_enum madera_in_dmic_osr[] = { + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC1L_CONTROL, MADERA_IN1_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC2L_CONTROL, MADERA_IN2_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC3L_CONTROL, MADERA_IN3_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC4L_CONTROL, MADERA_IN4_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC5L_CONTROL, MADERA_IN5_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), + SOC_VALUE_ENUM_SINGLE(MADERA_DMIC6L_CONTROL, MADERA_IN6_OSR_SHIFT, + 0x7, MADERA_OSR_ENUM_SIZE, + madera_in_dmic_osr_text, madera_in_dmic_osr_val), +}; +EXPORT_SYMBOL_GPL(madera_in_dmic_osr); + +static const char * const madera_anc_input_src_text[] = { + "None", "IN1", "IN2", "IN3", "IN4", "IN5", "IN6", +}; + +static const char * const madera_anc_channel_src_text[] = { + "None", "Left", "Right", "Combine", +}; + +const struct soc_enum madera_anc_input_src[] = { + SOC_ENUM_SINGLE(MADERA_ANC_SRC, + MADERA_IN_RXANCL_SEL_SHIFT, + ARRAY_SIZE(madera_anc_input_src_text), + madera_anc_input_src_text), + SOC_ENUM_SINGLE(MADERA_FCL_ADC_REFORMATTER_CONTROL, + MADERA_FCL_MIC_MODE_SEL_SHIFT, + ARRAY_SIZE(madera_anc_channel_src_text), + madera_anc_channel_src_text), + SOC_ENUM_SINGLE(MADERA_ANC_SRC, + MADERA_IN_RXANCR_SEL_SHIFT, + ARRAY_SIZE(madera_anc_input_src_text), + madera_anc_input_src_text), + SOC_ENUM_SINGLE(MADERA_FCR_ADC_REFORMATTER_CONTROL, + MADERA_FCR_MIC_MODE_SEL_SHIFT, + ARRAY_SIZE(madera_anc_channel_src_text), + madera_anc_channel_src_text), +}; +EXPORT_SYMBOL_GPL(madera_anc_input_src); + +static const char * const madera_anc_ng_texts[] = { + "None", "Internal", "External", +}; + +SOC_ENUM_SINGLE_DECL(madera_anc_ng_enum, SND_SOC_NOPM, 0, madera_anc_ng_texts); +EXPORT_SYMBOL_GPL(madera_anc_ng_enum); + +static const char * const madera_out_anc_src_text[] = { + "None", "RXANCL", "RXANCR", +}; + +const struct soc_enum madera_output_anc_src[] = { + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_1L, + MADERA_OUT1L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_1R, + MADERA_OUT1R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_2L, + MADERA_OUT2L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_2R, + MADERA_OUT2R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_3L, + MADERA_OUT3L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_3R, + MADERA_OUT3R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_4L, + MADERA_OUT4L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_4R, + MADERA_OUT4R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_5L, + MADERA_OUT5L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_5R, + MADERA_OUT5R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_6L, + MADERA_OUT6L_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), + SOC_ENUM_SINGLE(MADERA_OUTPUT_PATH_CONFIG_6R, + MADERA_OUT6R_ANC_SRC_SHIFT, + ARRAY_SIZE(madera_out_anc_src_text), + madera_out_anc_src_text), +}; +EXPORT_SYMBOL_GPL(madera_output_anc_src); + +int madera_dfc_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + unsigned int reg = e->reg; + unsigned int val; + int ret = 0; + + reg = ((reg / 6) * 6) - 2; + + snd_soc_dapm_mutex_lock(dapm); + + ret = snd_soc_component_read(component, reg, &val); + if (ret) + goto exit; + + if (val & MADERA_DFC1_ENA) { + ret = -EBUSY; + dev_err(component->dev, "Can't change mode on an active DFC\n"); + goto exit; + } + + ret = snd_soc_put_enum_double(kcontrol, ucontrol); +exit: + snd_soc_dapm_mutex_unlock(dapm); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_dfc_put); + +int madera_lp_mode_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + unsigned int val, mask; + int ret; + + snd_soc_dapm_mutex_lock(dapm); + + /* Cannot change lp mode on an active input */ + ret = snd_soc_component_read(component, MADERA_INPUT_ENABLES, &val); + if (ret) + goto exit; + mask = (mc->reg - MADERA_ADC_DIGITAL_VOLUME_1L) / 4; + mask ^= 0x1; /* Flip bottom bit for channel order */ + + if (val & (1 << mask)) { + ret = -EBUSY; + dev_err(component->dev, + "Can't change lp mode on an active input\n"); + goto exit; + } + + ret = snd_soc_put_volsw(kcontrol, ucontrol); + +exit: + snd_soc_dapm_mutex_unlock(dapm); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_lp_mode_put); + +const struct snd_kcontrol_new madera_dsp_trigger_output_mux[] = { + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), +}; +EXPORT_SYMBOL_GPL(madera_dsp_trigger_output_mux); + +const struct snd_kcontrol_new madera_drc_activity_output_mux[] = { + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), + SOC_DAPM_SINGLE("Switch", SND_SOC_NOPM, 0, 1, 0), +}; +EXPORT_SYMBOL_GPL(madera_drc_activity_output_mux); + +static void madera_in_set_vu(struct madera_priv *priv, bool enable) +{ + unsigned int val; + int i, ret; + + if (enable) + val = MADERA_IN_VU; + else + val = 0; + + for (i = 0; i < priv->num_inputs; i++) { + ret = regmap_update_bits(priv->madera->regmap, + MADERA_ADC_DIGITAL_VOLUME_1L + (i * 4), + MADERA_IN_VU, val); + if (ret) + dev_warn(priv->madera->dev, + "Failed to modify VU bits: %d\n", ret); + } +} + +int madera_in_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, + int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + unsigned int reg, val; + int ret; + + if (w->shift % 2) + reg = MADERA_ADC_DIGITAL_VOLUME_1L + ((w->shift / 2) * 8); + else + reg = MADERA_ADC_DIGITAL_VOLUME_1R + ((w->shift / 2) * 8); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + priv->in_pending++; + break; + case SND_SOC_DAPM_POST_PMU: + priv->in_pending--; + snd_soc_component_update_bits(component, reg, + MADERA_IN1L_MUTE, 0); + + /* If this is the last input pending then allow VU */ + if (priv->in_pending == 0) { + usleep_range(1000, 3000); + madera_in_set_vu(priv, true); + } + break; + case SND_SOC_DAPM_PRE_PMD: + snd_soc_component_update_bits(component, reg, + MADERA_IN1L_MUTE | MADERA_IN_VU, + MADERA_IN1L_MUTE | MADERA_IN_VU); + break; + case SND_SOC_DAPM_POST_PMD: + /* Disable volume updates if no inputs are enabled */ + ret = snd_soc_component_read(component, MADERA_INPUT_ENABLES, + &val); + if (!ret && !val) + madera_in_set_vu(priv, false); + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(madera_in_ev); + +int madera_out_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + int out_up_delay; + + switch (madera->type) { + case CS47L90: + case CS47L91: + out_up_delay = 6; + break; + default: + out_up_delay = 17; + break; + } + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + switch (w->shift) { + case MADERA_OUT1L_ENA_SHIFT: + case MADERA_OUT1R_ENA_SHIFT: + case MADERA_OUT2L_ENA_SHIFT: + case MADERA_OUT2R_ENA_SHIFT: + case MADERA_OUT3L_ENA_SHIFT: + case MADERA_OUT3R_ENA_SHIFT: + priv->out_up_pending++; + priv->out_up_delay += out_up_delay; + break; + default: + break; + } + break; + + case SND_SOC_DAPM_POST_PMU: + switch (w->shift) { + case MADERA_OUT1L_ENA_SHIFT: + case MADERA_OUT1R_ENA_SHIFT: + case MADERA_OUT2L_ENA_SHIFT: + case MADERA_OUT2R_ENA_SHIFT: + case MADERA_OUT3L_ENA_SHIFT: + case MADERA_OUT3R_ENA_SHIFT: + priv->out_up_pending--; + if (!priv->out_up_pending) { + msleep(priv->out_up_delay); + priv->out_up_delay = 0; + } + break; + + default: + break; + } + break; + + case SND_SOC_DAPM_PRE_PMD: + switch (w->shift) { + case MADERA_OUT1L_ENA_SHIFT: + case MADERA_OUT1R_ENA_SHIFT: + case MADERA_OUT2L_ENA_SHIFT: + case MADERA_OUT2R_ENA_SHIFT: + case MADERA_OUT3L_ENA_SHIFT: + case MADERA_OUT3R_ENA_SHIFT: + priv->out_down_pending++; + priv->out_down_delay++; + break; + default: + break; + } + break; + + case SND_SOC_DAPM_POST_PMD: + switch (w->shift) { + case MADERA_OUT1L_ENA_SHIFT: + case MADERA_OUT1R_ENA_SHIFT: + case MADERA_OUT2L_ENA_SHIFT: + case MADERA_OUT2R_ENA_SHIFT: + case MADERA_OUT3L_ENA_SHIFT: + case MADERA_OUT3R_ENA_SHIFT: + priv->out_down_pending--; + if (!priv->out_down_pending) { + msleep(priv->out_down_delay); + priv->out_down_delay = 0; + } + break; + default: + break; + } + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(madera_out_ev); + +int madera_hp_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + unsigned int mask = 1 << w->shift; + unsigned int out_num = w->shift / 2; + unsigned int val; + unsigned int ep_sel = 0; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + val = mask; + break; + case SND_SOC_DAPM_PRE_PMD: + val = 0; + break; + case SND_SOC_DAPM_PRE_PMU: + case SND_SOC_DAPM_POST_PMD: + return madera_out_ev(w, kcontrol, event); + default: + return 0; + } + + /* Store the desired state for the HP outputs */ + madera->hp_ena &= ~mask; + madera->hp_ena |= val; + + /* if OUT1 is routed to EPOUT, ignore HP clamp and impedance */ + regmap_read(madera->regmap, MADERA_OUTPUT_ENABLES_1, &ep_sel); + ep_sel &= MADERA_EP_SEL_MASK; + + /* Force off if HPDET has disabled the clamp for this output */ + if (!ep_sel && + (!madera->out_clamp[out_num] || madera->out_shorted[out_num])) + val = 0; + + regmap_update_bits(madera->regmap, MADERA_OUTPUT_ENABLES_1, mask, val); + + return madera_out_ev(w, kcontrol, event); +} +EXPORT_SYMBOL_GPL(madera_hp_ev); + +int madera_anc_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, + int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + unsigned int val; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + val = 1 << w->shift; + break; + case SND_SOC_DAPM_PRE_PMD: + val = 1 << (w->shift + 1); + break; + default: + return 0; + } + + snd_soc_component_write(component, MADERA_CLOCK_CONTROL, val); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_anc_ev); + +static const unsigned int madera_opclk_ref_48k_rates[] = { + 6144000, + 12288000, + 24576000, + 49152000, +}; + +static const unsigned int madera_opclk_ref_44k1_rates[] = { + 5644800, + 11289600, + 22579200, + 45158400, +}; + +static int madera_set_opclk(struct snd_soc_component *component, + unsigned int clk, unsigned int freq) +{ + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + unsigned int mask = MADERA_OPCLK_DIV_MASK | MADERA_OPCLK_SEL_MASK; + unsigned int reg, val; + const unsigned int *rates; + int ref, div, refclk; + + BUILD_BUG_ON(ARRAY_SIZE(madera_opclk_ref_48k_rates) != + ARRAY_SIZE(madera_opclk_ref_44k1_rates)); + + switch (clk) { + case MADERA_CLK_OPCLK: + reg = MADERA_OUTPUT_SYSTEM_CLOCK; + refclk = priv->sysclk; + break; + case MADERA_CLK_ASYNC_OPCLK: + reg = MADERA_OUTPUT_ASYNC_CLOCK; + refclk = priv->asyncclk; + break; + default: + return -EINVAL; + } + + if (refclk % 4000) + rates = madera_opclk_ref_44k1_rates; + else + rates = madera_opclk_ref_48k_rates; + + for (ref = 0; ref < ARRAY_SIZE(madera_opclk_ref_48k_rates); ++ref) { + if (rates[ref] > refclk) + continue; + + div = 2; + while ((rates[ref] / div >= freq) && (div <= 30)) { + if (rates[ref] / div == freq) { + dev_dbg(component->dev, "Configured %dHz OPCLK\n", + freq); + + val = (div << MADERA_OPCLK_DIV_SHIFT) | ref; + + snd_soc_component_update_bits(component, reg, + mask, val); + return 0; + } + div += 2; + } + } + + dev_err(component->dev, "Unable to generate %dHz OPCLK\n", freq); + + return -EINVAL; +} + +static int madera_get_sysclk_setting(unsigned int freq) +{ + switch (freq) { + case 0: + case 5644800: + case 6144000: + return 0; + case 11289600: + case 12288000: + return MADERA_SYSCLK_12MHZ << MADERA_SYSCLK_FREQ_SHIFT; + case 22579200: + case 24576000: + return MADERA_SYSCLK_24MHZ << MADERA_SYSCLK_FREQ_SHIFT; + case 45158400: + case 49152000: + return MADERA_SYSCLK_49MHZ << MADERA_SYSCLK_FREQ_SHIFT; + case 90316800: + case 98304000: + return MADERA_SYSCLK_98MHZ << MADERA_SYSCLK_FREQ_SHIFT; + default: + return -EINVAL; + } +} + +static int madera_get_legacy_dspclk_setting(struct madera *madera, + unsigned int freq) +{ + switch (freq) { + case 0: + return 0; + case 45158400: + case 49152000: + switch (madera->type) { + case CS47L85: + case WM1840: + if (madera->rev < 3) + return -EINVAL; + else + return MADERA_SYSCLK_49MHZ << + MADERA_SYSCLK_FREQ_SHIFT; + default: + return -EINVAL; + } + case 135475200: + case 147456000: + return MADERA_DSPCLK_147MHZ << MADERA_DSP_CLK_FREQ_LEGACY_SHIFT; + default: + return -EINVAL; + } +} + +static int madera_get_dspclk_setting(struct madera *madera, + unsigned int freq, + unsigned int *clock_2_val) +{ + switch (madera->type) { + case CS47L35: + case CS47L85: + case WM1840: + *clock_2_val = 0; /* don't use MADERA_DSP_CLOCK_2 */ + return madera_get_legacy_dspclk_setting(madera, freq); + default: + if (freq > 150000000) + return -EINVAL; + + /* Use new exact frequency control */ + *clock_2_val = freq / 15625; /* freq * (2^6) / (10^6) */ + return 0; + } +} + +int madera_set_sysclk(struct snd_soc_component *component, int clk_id, + int source, unsigned int freq, int dir) +{ + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + char *name; + unsigned int reg, clock_2_val = 0; + unsigned int mask = MADERA_SYSCLK_FREQ_MASK | MADERA_SYSCLK_SRC_MASK; + unsigned int val = source << MADERA_SYSCLK_SRC_SHIFT; + int clk_freq_sel, *clk; + int ret = 0; + + switch (clk_id) { + case MADERA_CLK_SYSCLK_1: + name = "SYSCLK"; + reg = MADERA_SYSTEM_CLOCK_1; + clk = &priv->sysclk; + clk_freq_sel = madera_get_sysclk_setting(freq); + mask |= MADERA_SYSCLK_FRAC; + break; + case MADERA_CLK_ASYNCCLK_1: + name = "ASYNCCLK"; + reg = MADERA_ASYNC_CLOCK_1; + clk = &priv->asyncclk; + clk_freq_sel = madera_get_sysclk_setting(freq); + break; + case MADERA_CLK_DSPCLK: + name = "DSPCLK"; + reg = MADERA_DSP_CLOCK_1; + clk = &priv->dspclk; + clk_freq_sel = madera_get_dspclk_setting(madera, freq, + &clock_2_val); + break; + case MADERA_CLK_OPCLK: + case MADERA_CLK_ASYNC_OPCLK: + return madera_set_opclk(component, clk_id, freq); + default: + return -EINVAL; + } + + if (clk_freq_sel < 0) { + dev_err(madera->dev, + "Failed to get clk setting for %dHZ\n", freq); + return clk_freq_sel; + } + + *clk = freq; + + if (freq == 0) { + dev_dbg(madera->dev, "%s cleared\n", name); + return 0; + } + + val |= clk_freq_sel; + + if (clock_2_val) { + ret = regmap_write(madera->regmap, MADERA_DSP_CLOCK_2, + clock_2_val); + if (ret) { + dev_err(madera->dev, + "Failed to write DSP_CONFIG2: %d\n", ret); + return ret; + } + + /* + * We're using the frequency setting in MADERA_DSP_CLOCK_2 so + * don't change the frequency select bits in MADERA_DSP_CLOCK_1 + */ + mask = MADERA_SYSCLK_SRC_MASK; + } + + if (freq % 6144000) + val |= MADERA_SYSCLK_FRAC; + + dev_dbg(madera->dev, "%s set to %uHz\n", name, freq); + + return regmap_update_bits(madera->regmap, reg, mask, val); +} +EXPORT_SYMBOL_GPL(madera_set_sysclk); + +static int madera_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + int lrclk, bclk, mode, base; + + base = dai->driver->base; + + lrclk = 0; + bclk = 0; + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_DSP_A: + mode = MADERA_FMT_DSP_MODE_A; + break; + case SND_SOC_DAIFMT_DSP_B: + if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != + SND_SOC_DAIFMT_CBM_CFM) { + madera_aif_err(dai, "DSP_B not valid in slave mode\n"); + return -EINVAL; + } + mode = MADERA_FMT_DSP_MODE_B; + break; + case SND_SOC_DAIFMT_I2S: + mode = MADERA_FMT_I2S_MODE; + break; + case SND_SOC_DAIFMT_LEFT_J: + if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != + SND_SOC_DAIFMT_CBM_CFM) { + madera_aif_err(dai, "LEFT_J not valid in slave mode\n"); + return -EINVAL; + } + mode = MADERA_FMT_LEFT_JUSTIFIED_MODE; + break; + default: + madera_aif_err(dai, "Unsupported DAI format %d\n", + fmt & SND_SOC_DAIFMT_FORMAT_MASK); + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + break; + case SND_SOC_DAIFMT_CBS_CFM: + lrclk |= MADERA_AIF1TX_LRCLK_MSTR; + break; + case SND_SOC_DAIFMT_CBM_CFS: + bclk |= MADERA_AIF1_BCLK_MSTR; + break; + case SND_SOC_DAIFMT_CBM_CFM: + bclk |= MADERA_AIF1_BCLK_MSTR; + lrclk |= MADERA_AIF1TX_LRCLK_MSTR; + break; + default: + madera_aif_err(dai, "Unsupported master mode %d\n", + fmt & SND_SOC_DAIFMT_MASTER_MASK); + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_IF: + bclk |= MADERA_AIF1_BCLK_INV; + lrclk |= MADERA_AIF1TX_LRCLK_INV; + break; + case SND_SOC_DAIFMT_IB_NF: + bclk |= MADERA_AIF1_BCLK_INV; + break; + case SND_SOC_DAIFMT_NB_IF: + lrclk |= MADERA_AIF1TX_LRCLK_INV; + break; + default: + madera_aif_err(dai, "Unsupported invert mode %d\n", + fmt & SND_SOC_DAIFMT_INV_MASK); + return -EINVAL; + } + + regmap_update_bits(madera->regmap, base + MADERA_AIF_BCLK_CTRL, + MADERA_AIF1_BCLK_INV | MADERA_AIF1_BCLK_MSTR, + bclk); + regmap_update_bits(madera->regmap, base + MADERA_AIF_TX_PIN_CTRL, + MADERA_AIF1TX_LRCLK_INV | MADERA_AIF1TX_LRCLK_MSTR, + lrclk); + regmap_update_bits(madera->regmap, base + MADERA_AIF_RX_PIN_CTRL, + MADERA_AIF1RX_LRCLK_INV | MADERA_AIF1RX_LRCLK_MSTR, + lrclk); + regmap_update_bits(madera->regmap, base + MADERA_AIF_FORMAT, + MADERA_AIF1_FMT_MASK, mode); + + return 0; +} + +static const int madera_48k_bclk_rates[] = { + -1, + 48000, + 64000, + 96000, + 128000, + 192000, + 256000, + 384000, + 512000, + 768000, + 1024000, + 1536000, + 2048000, + 3072000, + 4096000, + 6144000, + 8192000, + 12288000, + 24576000, +}; + +static const int madera_44k1_bclk_rates[] = { + -1, + 44100, + 58800, + 88200, + 117600, + 177640, + 235200, + 352800, + 470400, + 705600, + 940800, + 1411200, + 1881600, + 2822400, + 3763200, + 5644800, + 7526400, + 11289600, + 22579200, +}; + +static const unsigned int madera_sr_vals[] = { + 0, + 12000, + 24000, + 48000, + 96000, + 192000, + 384000, + 768000, + 0, + 11025, + 22050, + 44100, + 88200, + 176400, + 352800, + 705600, + 4000, + 8000, + 16000, + 32000, + 64000, + 128000, + 256000, + 512000, +}; + +#define MADERA_192K_48K_RATE_MASK 0x0F003E +#define MADERA_192K_44K1_RATE_MASK 0x003E00 +#define MADERA_192K_RATE_MASK (MADERA_192K_48K_RATE_MASK | \ + MADERA_192K_44K1_RATE_MASK) + +static const struct snd_pcm_hw_constraint_list madera_constraint = { + .count = ARRAY_SIZE(madera_sr_vals), + .list = madera_sr_vals, +}; + +static int madera_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera_dai_priv *dai_priv = &priv->dai[dai->id - 1]; + unsigned int base_rate; + + if (!substream->runtime) + return 0; + + switch (dai_priv->clk) { + case MADERA_CLK_SYSCLK_1: + case MADERA_CLK_SYSCLK_2: + case MADERA_CLK_SYSCLK_3: + base_rate = priv->sysclk; + break; + case MADERA_CLK_ASYNCCLK_1: + case MADERA_CLK_ASYNCCLK_2: + base_rate = priv->asyncclk; + break; + default: + return 0; + } + + if (base_rate == 0) + dai_priv->constraint.mask = MADERA_192K_RATE_MASK; + else if (base_rate % 4000) + dai_priv->constraint.mask = MADERA_192K_44K1_RATE_MASK; + else + dai_priv->constraint.mask = MADERA_192K_48K_RATE_MASK; + + return snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &dai_priv->constraint); +} + +static int madera_hw_params_rate(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera_dai_priv *dai_priv = &priv->dai[dai->id - 1]; + int base = dai->driver->base; + int i, sr_val; + unsigned int reg, cur, tar; + int ret; + + for (i = 0; i < ARRAY_SIZE(madera_sr_vals); i++) + if (madera_sr_vals[i] == params_rate(params)) + break; + + if (i == ARRAY_SIZE(madera_sr_vals)) { + madera_aif_err(dai, "Unsupported sample rate %dHz\n", + params_rate(params)); + return -EINVAL; + } + sr_val = i; + + switch (dai_priv->clk) { + case MADERA_CLK_SYSCLK_1: + reg = MADERA_SAMPLE_RATE_1; + tar = 0 << MADERA_AIF1_RATE_SHIFT; + break; + case MADERA_CLK_SYSCLK_2: + reg = MADERA_SAMPLE_RATE_2; + tar = 1 << MADERA_AIF1_RATE_SHIFT; + break; + case MADERA_CLK_SYSCLK_3: + reg = MADERA_SAMPLE_RATE_3; + tar = 2 << MADERA_AIF1_RATE_SHIFT; + break; + case MADERA_CLK_ASYNCCLK_1: + reg = MADERA_ASYNC_SAMPLE_RATE_1, + tar = 8 << MADERA_AIF1_RATE_SHIFT; + break; + case MADERA_CLK_ASYNCCLK_2: + reg = MADERA_ASYNC_SAMPLE_RATE_2, + tar = 9 << MADERA_AIF1_RATE_SHIFT; + break; + default: + madera_aif_err(dai, "Invalid clock %d\n", dai_priv->clk); + return -EINVAL; + } + + snd_soc_component_update_bits(component, reg, MADERA_SAMPLE_RATE_1_MASK, + sr_val); + + if (!base) + return 0; + + ret = regmap_read(priv->madera->regmap, + base + MADERA_AIF_RATE_CTRL, &cur); + if (ret != 0) { + madera_aif_err(dai, "Failed to check rate: %d\n", ret); + return ret; + } + + if ((cur & MADERA_AIF1_RATE_MASK) == (tar & MADERA_AIF1_RATE_MASK)) + return 0; + + mutex_lock(&priv->rate_lock); + + if (!madera_can_change_grp_rate(priv, base + MADERA_AIF_RATE_CTRL)) { + madera_aif_warn(dai, "Cannot change rate while active\n"); + ret = -EBUSY; + goto out; + } + + /* Guard the rate change with SYSCLK cycles */ + madera_spin_sysclk(priv); + snd_soc_component_update_bits(component, base + MADERA_AIF_RATE_CTRL, + MADERA_AIF1_RATE_MASK, tar); + madera_spin_sysclk(priv); + +out: + mutex_unlock(&priv->rate_lock); + + return ret; +} + +static int madera_aif_cfg_changed(struct snd_soc_component *component, + int base, int bclk, int lrclk, int frame) +{ + unsigned int val; + int ret; + + ret = snd_soc_component_read(component, base + MADERA_AIF_BCLK_CTRL, + &val); + if (ret) + return ret; + if (bclk != (val & MADERA_AIF1_BCLK_FREQ_MASK)) + return 1; + + ret = snd_soc_component_read(component, base + MADERA_AIF_RX_BCLK_RATE, + &val); + if (ret) + return ret; + if (lrclk != (val & MADERA_AIF1RX_BCPF_MASK)) + return 1; + + ret = snd_soc_component_read(component, base + MADERA_AIF_FRAME_CTRL_1, + &val); + if (ret) + return ret; + if (frame != (val & (MADERA_AIF1TX_WL_MASK | + MADERA_AIF1TX_SLOT_LEN_MASK))) + return 1; + + return 0; +} + +static int madera_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + int base = dai->driver->base; + const int *rates; + int i, ret; + unsigned int val; + unsigned int channels = params_channels(params); + unsigned int rate = params_rate(params); + unsigned int chan_limit = + madera->pdata.codec.max_channels_clocked[dai->id - 1]; + int tdm_width = priv->tdm_width[dai->id - 1]; + int tdm_slots = priv->tdm_slots[dai->id - 1]; + int bclk, lrclk, wl, frame, bclk_target, num_rates; + int reconfig; + unsigned int aif_tx_state = 0, aif_rx_state = 0; + + if (rate % 4000) { + rates = &madera_44k1_bclk_rates[0]; + num_rates = ARRAY_SIZE(madera_44k1_bclk_rates); + } else { + rates = &madera_48k_bclk_rates[0]; + num_rates = ARRAY_SIZE(madera_48k_bclk_rates); + } + + wl = snd_pcm_format_width(params_format(params)); + + if (tdm_slots) { + madera_aif_dbg(dai, "Configuring for %d %d bit TDM slots\n", + tdm_slots, tdm_width); + bclk_target = tdm_slots * tdm_width * rate; + channels = tdm_slots; + } else { + bclk_target = snd_soc_params_to_bclk(params); + tdm_width = wl; + } + + if (chan_limit && chan_limit < channels) { + madera_aif_dbg(dai, "Limiting to %d channels\n", chan_limit); + bclk_target /= channels; + bclk_target *= chan_limit; + } + + /* Force multiple of 2 channels for I2S mode */ + ret = snd_soc_component_read(component, base + MADERA_AIF_FORMAT, &val); + if (ret) + return ret; + + val &= MADERA_AIF1_FMT_MASK; + if ((channels & 1) && val == MADERA_FMT_I2S_MODE) { + madera_aif_dbg(dai, "Forcing stereo mode\n"); + bclk_target /= channels; + bclk_target *= channels + 1; + } + + for (i = 0; i < num_rates; i++) { + if (rates[i] >= bclk_target && rates[i] % rate == 0) { + bclk = i; + break; + } + } + + if (i == num_rates) { + madera_aif_err(dai, "Unsupported sample rate %dHz\n", rate); + return -EINVAL; + } + + lrclk = rates[bclk] / rate; + + madera_aif_dbg(dai, "BCLK %dHz LRCLK %dHz\n", + rates[bclk], rates[bclk] / lrclk); + + frame = wl << MADERA_AIF1TX_WL_SHIFT | tdm_width; + + reconfig = madera_aif_cfg_changed(component, base, bclk, lrclk, frame); + if (reconfig < 0) + return reconfig; + + if (reconfig) { + /* Save AIF TX/RX state */ + regmap_read(madera->regmap, base + MADERA_AIF_TX_ENABLES, + &aif_tx_state); + regmap_read(madera->regmap, base + MADERA_AIF_RX_ENABLES, + &aif_rx_state); + /* Disable AIF TX/RX before reconfiguring it */ + regmap_update_bits(madera->regmap, + base + MADERA_AIF_TX_ENABLES, 0xff, 0x0); + regmap_update_bits(madera->regmap, + base + MADERA_AIF_RX_ENABLES, 0xff, 0x0); + } + + ret = madera_hw_params_rate(substream, params, dai); + if (ret != 0) + goto restore_aif; + + if (reconfig) { + regmap_update_bits(madera->regmap, + base + MADERA_AIF_BCLK_CTRL, + MADERA_AIF1_BCLK_FREQ_MASK, bclk); + regmap_update_bits(madera->regmap, + base + MADERA_AIF_RX_BCLK_RATE, + MADERA_AIF1RX_BCPF_MASK, lrclk); + regmap_update_bits(madera->regmap, + base + MADERA_AIF_FRAME_CTRL_1, + MADERA_AIF1TX_WL_MASK | + MADERA_AIF1TX_SLOT_LEN_MASK, frame); + regmap_update_bits(madera->regmap, + base + MADERA_AIF_FRAME_CTRL_2, + MADERA_AIF1RX_WL_MASK | + MADERA_AIF1RX_SLOT_LEN_MASK, frame); + } + +restore_aif: + if (reconfig) { + /* Restore AIF TX/RX state */ + regmap_update_bits(madera->regmap, + base + MADERA_AIF_TX_ENABLES, + 0xff, aif_tx_state); + regmap_update_bits(madera->regmap, + base + MADERA_AIF_RX_ENABLES, + 0xff, aif_rx_state); + } + + return ret; +} + +static int madera_is_syncclk(int clk_id) +{ + switch (clk_id) { + case MADERA_CLK_SYSCLK_1: + case MADERA_CLK_SYSCLK_2: + case MADERA_CLK_SYSCLK_3: + return 1; + case MADERA_CLK_ASYNCCLK_1: + case MADERA_CLK_ASYNCCLK_2: + return 0; + default: + return -EINVAL; + } +} + +static int madera_dai_set_sysclk(struct snd_soc_dai *dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_component *component = dai->component; + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera_dai_priv *dai_priv = &priv->dai[dai->id - 1]; + struct snd_soc_dapm_route routes[2]; + int is_sync; + + is_sync = madera_is_syncclk(clk_id); + if (is_sync < 0) { + dev_err(component->dev, "Illegal DAI clock id %d\n", clk_id); + return is_sync; + } + + if (is_sync == madera_is_syncclk(dai_priv->clk)) + return 0; + + if (dai->active) { + dev_err(component->dev, "Can't change clock on active DAI %d\n", + dai->id); + return -EBUSY; + } + + dev_dbg(component->dev, "Setting AIF%d to %s\n", dai->id, + is_sync ? "SYSCLK" : "ASYNCCLK"); + + /* + * A connection to SYSCLK is always required, we only add and remove + * a connection to ASYNCCLK + */ + memset(&routes, 0, sizeof(routes)); + routes[0].sink = dai->driver->capture.stream_name; + routes[1].sink = dai->driver->playback.stream_name; + routes[0].source = "ASYNCCLK"; + routes[1].source = "ASYNCCLK"; + + if (is_sync) + snd_soc_dapm_del_routes(dapm, routes, ARRAY_SIZE(routes)); + else + snd_soc_dapm_add_routes(dapm, routes, ARRAY_SIZE(routes)); + + dai_priv->clk = clk_id; + + return snd_soc_dapm_sync(dapm); +} + +static int madera_set_tristate(struct snd_soc_dai *dai, int tristate) +{ + struct snd_soc_component *component = dai->component; + int base = dai->driver->base; + unsigned int reg; + int ret; + + if (tristate) + reg = MADERA_AIF1_TRI; + else + reg = 0; + + ret = snd_soc_component_update_bits(component, + base + MADERA_AIF_RATE_CTRL, + MADERA_AIF1_TRI, reg); + if (ret < 0) + return ret; + else + return 0; +} + +static void madera_set_channels_to_mask(struct snd_soc_dai *dai, + unsigned int base, + int channels, unsigned int mask) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + int slot, i; + + for (i = 0; i < channels; ++i) { + slot = ffs(mask) - 1; + if (slot < 0) + return; + + regmap_write(madera->regmap, base + i, slot); + + mask &= ~(1 << slot); + } + + if (mask) + madera_aif_warn(dai, "Too many channels in TDM mask\n"); +} + +static int madera_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + unsigned int rx_mask, int slots, int slot_width) +{ + struct snd_soc_component *component = dai->component; + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + int base = dai->driver->base; + int rx_max_chan = dai->driver->playback.channels_max; + int tx_max_chan = dai->driver->capture.channels_max; + + /* Only support TDM for the physical AIFs */ + if (dai->id > MADERA_MAX_AIF) + return -ENOTSUPP; + + if (slots == 0) { + tx_mask = (1 << tx_max_chan) - 1; + rx_mask = (1 << rx_max_chan) - 1; + } + + madera_set_channels_to_mask(dai, base + MADERA_AIF_FRAME_CTRL_3, + tx_max_chan, tx_mask); + madera_set_channels_to_mask(dai, base + MADERA_AIF_FRAME_CTRL_11, + rx_max_chan, rx_mask); + + priv->tdm_width[dai->id - 1] = slot_width; + priv->tdm_slots[dai->id - 1] = slots; + + return 0; +} + +const struct snd_soc_dai_ops madera_dai_ops = { + .startup = &madera_startup, + .set_fmt = &madera_set_fmt, + .set_tdm_slot = &madera_set_tdm_slot, + .hw_params = &madera_hw_params, + .set_sysclk = &madera_dai_set_sysclk, + .set_tristate = &madera_set_tristate, +}; +EXPORT_SYMBOL_GPL(madera_dai_ops); + +const struct snd_soc_dai_ops madera_simple_dai_ops = { + .startup = &madera_startup, + .hw_params = &madera_hw_params_rate, + .set_sysclk = &madera_dai_set_sysclk, +}; +EXPORT_SYMBOL_GPL(madera_simple_dai_ops); + +int madera_init_dai(struct madera_priv *priv, int id) +{ + struct madera_dai_priv *dai_priv = &priv->dai[id]; + + dai_priv->clk = MADERA_CLK_SYSCLK_1; + dai_priv->constraint = madera_constraint; + + return 0; +} +EXPORT_SYMBOL_GPL(madera_init_dai); + +static const struct { + unsigned int min; + unsigned int max; + u16 fratio; + int ratio; +} fll_sync_fratios[] = { + { 0, 64000, 4, 16 }, + { 64000, 128000, 3, 8 }, + { 128000, 256000, 2, 4 }, + { 256000, 1000000, 1, 2 }, + { 1000000, 13500000, 0, 1 }, +}; + +static const unsigned int pseudo_fref_max[MADERA_FLL_MAX_FRATIO] = { + 13500000, + 6144000, + 6144000, + 3072000, + 3072000, + 2822400, + 2822400, + 1536000, + 1536000, + 1536000, + 1536000, + 1536000, + 1536000, + 1536000, + 1536000, + 768000, +}; + +struct madera_fll_gains { + unsigned int min; + unsigned int max; + int gain; /* main gain */ + int alt_gain; /* alternate integer gain */ +}; + +static const struct madera_fll_gains madera_fll_sync_gains[] = { + { 0, 256000, 0, -1 }, + { 256000, 1000000, 2, -1 }, + { 1000000, 13500000, 4, -1 }, +}; + +static const struct madera_fll_gains madera_fll_main_gains[] = { + { 0, 100000, 0, 2 }, + { 100000, 375000, 2, 2 }, + { 375000, 768000, 3, 2 }, + { 768001, 1500000, 3, 3 }, + { 1500000, 6000000, 4, 3 }, + { 6000000, 13500000, 5, 3 }, +}; + +static int madera_find_sync_fratio(unsigned int fref, int *fratio) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fll_sync_fratios); i++) { + if (fll_sync_fratios[i].min <= fref && + fref <= fll_sync_fratios[i].max) { + if (fratio) + *fratio = fll_sync_fratios[i].fratio; + + return fll_sync_fratios[i].ratio; + } + } + + return -EINVAL; +} + +static int madera_find_main_fratio(unsigned int fref, unsigned int fout, + int *fratio) +{ + int ratio = 1; + + while ((fout / (ratio * fref)) > MADERA_FLL_MAX_N) + ratio++; + + if (fratio) + *fratio = ratio - 1; + + return ratio; +} + +static int madera_find_fratio(struct madera_fll *fll, unsigned int fref, + bool sync, int *fratio) +{ + switch (fll->madera->type) { + case CS47L35: + switch (fll->madera->rev) { + case 0: + /* rev A0 uses sync calculation for both loops */ + return madera_find_sync_fratio(fref, fratio); + default: + if (sync) + return madera_find_sync_fratio(fref, fratio); + else + return madera_find_main_fratio(fref, + fll->fout, + fratio); + } + break; + case CS47L85: + case WM1840: + /* these use the same calculation for main and sync loops */ + return madera_find_sync_fratio(fref, fratio); + default: + if (sync) + return madera_find_sync_fratio(fref, fratio); + else + return madera_find_main_fratio(fref, fll->fout, fratio); + } +} + +static int madera_calc_fratio(struct madera_fll *fll, + struct madera_fll_cfg *cfg, + unsigned int fref, bool sync) +{ + int init_ratio, ratio; + int refdiv, div; + + /* fref must be <=13.5MHz, find initial refdiv */ + div = 1; + cfg->refdiv = 0; + while (fref > MADERA_FLL_MAX_FREF) { + div *= 2; + fref /= 2; + cfg->refdiv++; + + if (div > MADERA_FLL_MAX_REFDIV) + return -EINVAL; + } + + /* Find an appropriate FLL_FRATIO */ + init_ratio = madera_find_fratio(fll, fref, sync, &cfg->fratio); + if (init_ratio < 0) { + madera_fll_err(fll, "Unable to find FRATIO for fref=%uHz\n", + fref); + return init_ratio; + } + + if (!sync) + cfg->fratio = init_ratio - 1; + + switch (fll->madera->type) { + case CS47L35: + switch (fll->madera->rev) { + case 0: + if (sync) + return init_ratio; + break; + default: + return init_ratio; + } + break; + case CS47L85: + case WM1840: + if (sync) + return init_ratio; + break; + default: + return init_ratio; + } + + /* + * For CS47L35 rev A0, CS47L85 and WM1840 adjust FRATIO/refdiv to avoid + * integer mode if possible + */ + refdiv = cfg->refdiv; + + while (div <= MADERA_FLL_MAX_REFDIV) { + /* + * start from init_ratio because this may already give a + * fractional N.K + */ + for (ratio = init_ratio; ratio > 0; ratio--) { + if (fll->fout % (ratio * fref)) { + cfg->refdiv = refdiv; + cfg->fratio = ratio - 1; + return ratio; + } + } + + for (ratio = init_ratio + 1; ratio <= MADERA_FLL_MAX_FRATIO; + ratio++) { + if ((MADERA_FLL_VCO_CORNER / 2) / + (MADERA_FLL_VCO_MULT * ratio) < fref) + break; + + if (fref > pseudo_fref_max[ratio - 1]) + break; + + if (fll->fout % (ratio * fref)) { + cfg->refdiv = refdiv; + cfg->fratio = ratio - 1; + return ratio; + } + } + + div *= 2; + fref /= 2; + refdiv++; + init_ratio = madera_find_fratio(fll, fref, sync, NULL); + } + + madera_fll_warn(fll, "Falling back to integer mode operation\n"); + + return cfg->fratio + 1; +} + +static int madera_find_fll_gain(struct madera_fll *fll, + struct madera_fll_cfg *cfg, + unsigned int fref, + const struct madera_fll_gains *gains, + int n_gains) +{ + int i; + + for (i = 0; i < n_gains; i++) { + if (gains[i].min <= fref && fref <= gains[i].max) { + cfg->gain = gains[i].gain; + cfg->alt_gain = gains[i].alt_gain; + return 0; + } + } + + madera_fll_err(fll, "Unable to find gain for fref=%uHz\n", fref); + + return -EINVAL; +} + +static int madera_calc_fll(struct madera_fll *fll, + struct madera_fll_cfg *cfg, + unsigned int fref, bool sync) +{ + unsigned int gcd_fll; + const struct madera_fll_gains *gains; + int n_gains; + int ratio, ret; + + madera_fll_dbg(fll, "fref=%u Fout=%u fvco=%u\n", + fref, fll->fout, fll->fout * MADERA_FLL_VCO_MULT); + + /* Find an appropriate FLL_FRATIO and refdiv */ + ratio = madera_calc_fratio(fll, cfg, fref, sync); + if (ratio < 0) + return ratio; + + /* Apply the division for our remaining calculations */ + fref = fref / (1 << cfg->refdiv); + + cfg->n = fll->fout / (ratio * fref); + + if (fll->fout % (ratio * fref)) { + gcd_fll = gcd(fll->fout, ratio * fref); + madera_fll_dbg(fll, "GCD=%u\n", gcd_fll); + + cfg->theta = (fll->fout - (cfg->n * ratio * fref)) + / gcd_fll; + cfg->lambda = (ratio * fref) / gcd_fll; + } else { + cfg->theta = 0; + cfg->lambda = 0; + } + + /* + * Round down to 16bit range with cost of accuracy lost. + * Denominator must be bigger than numerator so we only + * take care of it. + */ + while (cfg->lambda >= (1 << 16)) { + cfg->theta >>= 1; + cfg->lambda >>= 1; + } + + switch (fll->madera->type) { + case CS47L35: + switch (fll->madera->rev) { + case 0: + /* Rev A0 uses the sync gains for both loops */ + gains = madera_fll_sync_gains; + n_gains = ARRAY_SIZE(madera_fll_sync_gains); + break; + default: + if (sync) { + gains = madera_fll_sync_gains; + n_gains = ARRAY_SIZE(madera_fll_sync_gains); + } else { + gains = madera_fll_main_gains; + n_gains = ARRAY_SIZE(madera_fll_main_gains); + } + break; + } + break; + case CS47L85: + case WM1840: + /* These use the sync gains for both loops */ + gains = madera_fll_sync_gains; + n_gains = ARRAY_SIZE(madera_fll_sync_gains); + break; + default: + if (sync) { + gains = madera_fll_sync_gains; + n_gains = ARRAY_SIZE(madera_fll_sync_gains); + } else { + gains = madera_fll_main_gains; + n_gains = ARRAY_SIZE(madera_fll_main_gains); + } + break; + } + + ret = madera_find_fll_gain(fll, cfg, fref, gains, n_gains); + if (ret) + return ret; + + madera_fll_dbg(fll, "N=%d THETA=%d LAMBDA=%d\n", + cfg->n, cfg->theta, cfg->lambda); + madera_fll_dbg(fll, "FRATIO=0x%x(%d) REFCLK_DIV=0x%x(%d)\n", + cfg->fratio, ratio, cfg->refdiv, 1 << cfg->refdiv); + madera_fll_dbg(fll, "GAIN=0x%x(%d)\n", cfg->gain, 1 << cfg->gain); + + return 0; +} + +static bool madera_write_fll(struct madera *madera, unsigned int base, + struct madera_fll_cfg *cfg, int source, + bool sync, int gain) +{ + bool change, fll_change; + + fll_change = false; + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_3_OFFS, + MADERA_FLL1_THETA_MASK, + cfg->theta, &change); + fll_change |= change; + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_4_OFFS, + MADERA_FLL1_LAMBDA_MASK, + cfg->lambda, &change); + fll_change |= change; + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_5_OFFS, + MADERA_FLL1_FRATIO_MASK, + cfg->fratio << MADERA_FLL1_FRATIO_SHIFT, + &change); + fll_change |= change; + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_6_OFFS, + MADERA_FLL1_REFCLK_DIV_MASK | + MADERA_FLL1_REFCLK_SRC_MASK, + cfg->refdiv << MADERA_FLL1_REFCLK_DIV_SHIFT | + source << MADERA_FLL1_REFCLK_SRC_SHIFT, + &change); + fll_change |= change; + + if (sync) { + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_SYNCHRONISER_7_OFFS, + MADERA_FLL1_GAIN_MASK, + gain << MADERA_FLL1_GAIN_SHIFT, + &change); + fll_change |= change; + } else { + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_7_OFFS, + MADERA_FLL1_GAIN_MASK, + gain << MADERA_FLL1_GAIN_SHIFT, + &change); + fll_change |= change; + } + + regmap_update_bits_check(madera->regmap, + base + MADERA_FLL_CONTROL_2_OFFS, + MADERA_FLL1_CTRL_UPD | MADERA_FLL1_N_MASK, + MADERA_FLL1_CTRL_UPD | cfg->n, &change); + fll_change |= change; + + return fll_change; +} + +static int madera_is_enabled_fll(struct madera_fll *fll, int base) +{ + struct madera *madera = fll->madera; + unsigned int reg; + int ret; + + ret = regmap_read(madera->regmap, + base + MADERA_FLL_CONTROL_1_OFFS, ®); + if (ret != 0) { + madera_fll_err(fll, "Failed to read current state: %d\n", ret); + return ret; + } + + return reg & MADERA_FLL1_ENA; +} + +static int madera_wait_for_fll(struct madera_fll *fll, bool requested) +{ + struct madera *madera = fll->madera; + unsigned int val = 0; + bool status; + int i; + + madera_fll_dbg(fll, "Waiting for FLL...\n"); + + for (i = 0; i < 30; i++) { + regmap_read(madera->regmap, MADERA_IRQ1_RAW_STATUS_2, &val); + status = val & (MADERA_FLL1_LOCK_STS1 << (fll->id - 1)); + if (status == requested) + return 0; + + switch (i) { + case 0 ... 5: + usleep_range(75, 125); + break; + case 11 ... 20: + usleep_range(750, 1250); + break; + default: + msleep(20); + break; + } + } + + madera_fll_warn(fll, "Timed out waiting for lock\n"); + + return -ETIMEDOUT; +} + +static bool madera_set_fll_phase_integrator(struct madera_fll *fll, + struct madera_fll_cfg *ref_cfg, + bool sync) +{ + unsigned int val; + bool reg_change; + + if (!sync && ref_cfg->theta == 0) + val = (1 << MADERA_FLL1_PHASE_ENA_SHIFT) | + (2 << MADERA_FLL1_PHASE_GAIN_SHIFT); + else + val = 2 << MADERA_FLL1_PHASE_GAIN_SHIFT; + + regmap_update_bits_check(fll->madera->regmap, + fll->base + MADERA_FLL_EFS_2_OFFS, + MADERA_FLL1_PHASE_ENA_MASK | + MADERA_FLL1_PHASE_GAIN_MASK, + val, ®_change); + + return reg_change; +} + +static void madera_disable_fll(struct madera_fll *fll) +{ + struct madera *madera = fll->madera; + unsigned int sync_base; + bool change; + + switch (madera->type) { + case CS47L35: + sync_base = fll->base + CS47L35_FLL_SYNCHRONISER_OFFS; + break; + default: + sync_base = fll->base + MADERA_FLL_SYNCHRONISER_OFFS; + break; + } + + madera_fll_dbg(fll, "Disabling FLL\n"); + + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_FREERUN, MADERA_FLL1_FREERUN); + regmap_update_bits_check(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_ENA, 0, &change); + regmap_update_bits(madera->regmap, + sync_base + MADERA_FLL_SYNCHRONISER_1_OFFS, + MADERA_FLL1_SYNC_ENA, 0); + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_FREERUN, 0); + + madera_wait_for_fll(fll, false); + + if (change) + pm_runtime_put_autosuspend(madera->dev); +} + +static int madera_enable_fll(struct madera_fll *fll) +{ + struct madera *madera = fll->madera; + bool have_sync = false; + int already_enabled = madera_is_enabled_fll(fll, fll->base); + int sync_enabled; + struct madera_fll_cfg cfg; + unsigned int sync_base; + int gain, ret; + bool fll_change = false; + + if (already_enabled < 0) + return already_enabled; /* error getting current state */ + + if (fll->ref_src < 0 || fll->ref_freq == 0) { + madera_fll_err(fll, "No REFCLK\n"); + ret = -EINVAL; + goto err; + } + + madera_fll_dbg(fll, "Enabling FLL, initially %s\n", + already_enabled ? "enabled" : "disabled"); + + if (fll->fout < MADERA_FLL_MIN_FOUT || + fll->fout > MADERA_FLL_MAX_FOUT) { + madera_fll_err(fll, "invalid fout %uHz\n", fll->fout); + ret = -EINVAL; + goto err; + } + + switch (madera->type) { + case CS47L35: + sync_base = fll->base + CS47L35_FLL_SYNCHRONISER_OFFS; + break; + default: + sync_base = fll->base + MADERA_FLL_SYNCHRONISER_OFFS; + break; + } + + sync_enabled = madera_is_enabled_fll(fll, sync_base); + if (sync_enabled < 0) + return sync_enabled; + + if (already_enabled) { + /* Facilitate smooth refclk across the transition */ + regmap_update_bits(fll->madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_FREERUN, + MADERA_FLL1_FREERUN); + udelay(32); + regmap_update_bits(fll->madera->regmap, + fll->base + MADERA_FLL_CONTROL_7_OFFS, + MADERA_FLL1_GAIN_MASK, 0); + } + + /* Apply SYNCCLK setting */ + if (fll->sync_src >= 0) { + ret = madera_calc_fll(fll, &cfg, fll->sync_freq, true); + if (ret < 0) + goto err; + + fll_change |= madera_write_fll(madera, sync_base, + &cfg, fll->sync_src, + true, cfg.gain); + have_sync = true; + } + + if (already_enabled && !!sync_enabled != have_sync) + madera_fll_warn(fll, "Synchroniser changed on active FLL\n"); + + /* Apply REFCLK setting */ + ret = madera_calc_fll(fll, &cfg, fll->ref_freq, false); + if (ret < 0) + goto err; + + /* Ref path hardcodes lambda to 65536 when sync is on */ + if (have_sync && cfg.lambda) + cfg.theta = (cfg.theta * (1 << 16)) / cfg.lambda; + + switch (fll->madera->type) { + case CS47L35: + switch (fll->madera->rev) { + case 0: + gain = cfg.gain; + break; + default: + fll_change |= + madera_set_fll_phase_integrator(fll, &cfg, + have_sync); + if (!have_sync && cfg.theta == 0) + gain = cfg.alt_gain; + else + gain = cfg.gain; + break; + } + break; + case CS47L85: + case WM1840: + gain = cfg.gain; + break; + default: + fll_change |= madera_set_fll_phase_integrator(fll, &cfg, + have_sync); + if (!have_sync && cfg.theta == 0) + gain = cfg.alt_gain; + else + gain = cfg.gain; + break; + } + + fll_change |= madera_write_fll(madera, fll->base, + &cfg, fll->ref_src, + false, gain); + + /* + * Increase the bandwidth if we're not using a low frequency + * sync source. + */ + if (have_sync && fll->sync_freq > 100000) + regmap_update_bits(madera->regmap, + sync_base + MADERA_FLL_SYNCHRONISER_7_OFFS, + MADERA_FLL1_SYNC_DFSAT_MASK, 0); + else + regmap_update_bits(madera->regmap, + sync_base + MADERA_FLL_SYNCHRONISER_7_OFFS, + MADERA_FLL1_SYNC_DFSAT_MASK, + MADERA_FLL1_SYNC_DFSAT); + + if (!already_enabled) + pm_runtime_get_sync(madera->dev); + + if (have_sync) + regmap_update_bits(madera->regmap, + sync_base + MADERA_FLL_SYNCHRONISER_1_OFFS, + MADERA_FLL1_SYNC_ENA, + MADERA_FLL1_SYNC_ENA); + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_ENA, MADERA_FLL1_ENA); + + if (already_enabled) + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_FREERUN, 0); + + if (fll_change || !already_enabled) + madera_wait_for_fll(fll, true); + + return 0; + +err: + /* In case of error don't leave the FLL running with an old config */ + madera_disable_fll(fll); + + return ret; +} + +static int madera_apply_fll(struct madera_fll *fll) +{ + if (fll->fout) { + return madera_enable_fll(fll); + } else { + madera_disable_fll(fll); + return 0; + } +} + +int madera_set_fll_syncclk(struct madera_fll *fll, int source, + unsigned int fref, unsigned int fout) +{ + /* + * fout is ignored, since the synchronizer is an optional extra + * constraint on the Fout generated from REFCLK, so the Fout is + * set when configuring REFCLK + */ + + if (fll->sync_src == source && fll->sync_freq == fref) + return 0; + + fll->sync_src = source; + fll->sync_freq = fref; + + return madera_apply_fll(fll); +} +EXPORT_SYMBOL_GPL(madera_set_fll_syncclk); + +int madera_set_fll_refclk(struct madera_fll *fll, int source, + unsigned int fref, unsigned int fout) +{ + int ret; + + if (fll->ref_src == source && + fll->ref_freq == fref && fll->fout == fout) + return 0; + + /* + * Changes of fout on an enabled FLL aren't allowed except when + * setting fout==0 to disable the FLL + */ + if (fout && fout != fll->fout) { + ret = madera_is_enabled_fll(fll, fll->base); + if (ret < 0) + return ret; + + if (ret) { + madera_fll_err(fll, "Can't change Fout on active FLL\n"); + return -EBUSY; + } + } + + fll->ref_src = source; + fll->ref_freq = fref; + fll->fout = fout; + + return madera_apply_fll(fll); +} +EXPORT_SYMBOL_GPL(madera_set_fll_refclk); + +int madera_init_fll(struct madera *madera, int id, int base, + struct madera_fll *fll) +{ + fll->id = id; + fll->base = base; + fll->madera = madera; + fll->ref_src = MADERA_FLL_SRC_NONE; + fll->sync_src = MADERA_FLL_SRC_NONE; + + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLL_CONTROL_1_OFFS, + MADERA_FLL1_FREERUN, 0); + + return 0; +} +EXPORT_SYMBOL_GPL(madera_init_fll); + +static const struct reg_sequence madera_fll_ao_32K_49M_patch[] = { + { MADERA_FLLAO_CONTROL_2, 0x02EE }, + { MADERA_FLLAO_CONTROL_3, 0x0000 }, + { MADERA_FLLAO_CONTROL_4, 0x0001 }, + { MADERA_FLLAO_CONTROL_5, 0x0002 }, + { MADERA_FLLAO_CONTROL_6, 0x8001 }, + { MADERA_FLLAO_CONTROL_7, 0x0004 }, + { MADERA_FLLAO_CONTROL_8, 0x0077 }, + { MADERA_FLLAO_CONTROL_10, 0x06D8 }, + { MADERA_FLLAO_CONTROL_11, 0x0085 }, + { MADERA_FLLAO_CONTROL_2, 0x82EE }, +}; + +static const struct reg_sequence madera_fll_ao_32K_45M_patch[] = { + { MADERA_FLLAO_CONTROL_2, 0x02B1 }, + { MADERA_FLLAO_CONTROL_3, 0x0001 }, + { MADERA_FLLAO_CONTROL_4, 0x0010 }, + { MADERA_FLLAO_CONTROL_5, 0x0002 }, + { MADERA_FLLAO_CONTROL_6, 0x8001 }, + { MADERA_FLLAO_CONTROL_7, 0x0004 }, + { MADERA_FLLAO_CONTROL_8, 0x0077 }, + { MADERA_FLLAO_CONTROL_10, 0x06D8 }, + { MADERA_FLLAO_CONTROL_11, 0x0005 }, + { MADERA_FLLAO_CONTROL_2, 0x82B1 }, +}; + +struct madera_fllao_patch { + unsigned int fin; + unsigned int fout; + const struct reg_sequence *patch; + unsigned int patch_size; +}; + +static const struct madera_fllao_patch madera_fllao_settings[] = { + { + .fin = 32768, + .fout = 49152000, + .patch = madera_fll_ao_32K_49M_patch, + .patch_size = ARRAY_SIZE(madera_fll_ao_32K_49M_patch), + + }, + { + .fin = 32768, + .fout = 45158400, + .patch = madera_fll_ao_32K_45M_patch, + .patch_size = ARRAY_SIZE(madera_fll_ao_32K_45M_patch), + }, +}; + +static int madera_enable_fll_ao(struct madera_fll *fll, + const struct reg_sequence *patch, + unsigned int patch_size) +{ + struct madera *madera = fll->madera; + int already_enabled = madera_is_enabled_fll(fll, fll->base); + unsigned int val; + int i; + + if (already_enabled < 0) + return already_enabled; + + if (!already_enabled) + pm_runtime_get_sync(madera->dev); + + madera_fll_dbg(fll, "Enabling FLL_AO, initially %s\n", + already_enabled ? "enabled" : "disabled"); + + /* FLL_AO_HOLD must be set before configuring any registers */ + regmap_update_bits(fll->madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_1_OFFS, + MADERA_FLL_AO_HOLD, MADERA_FLL_AO_HOLD); + + for (i = 0; i < patch_size; i++) { + val = patch[i].def; + + /* modify the patch to apply fll->ref_src as input clock */ + if (patch[i].reg == MADERA_FLLAO_CONTROL_6) { + val &= ~MADERA_FLL_AO_REFCLK_SRC_MASK; + val |= (fll->ref_src << MADERA_FLL_AO_REFCLK_SRC_SHIFT) + & MADERA_FLL_AO_REFCLK_SRC_MASK; + } + + regmap_write(madera->regmap, patch[i].reg, val); + } + + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_1_OFFS, + MADERA_FLL_AO_ENA, MADERA_FLL_AO_ENA); + + /* Release the hold so that fll_ao locks to external frequency */ + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_1_OFFS, + MADERA_FLL_AO_HOLD, 0); + + if (!already_enabled) + madera_wait_for_fll(fll, true); + + return 0; +} + +static int madera_disable_fll_ao(struct madera_fll *fll) +{ + struct madera *madera = fll->madera; + bool change; + + madera_fll_dbg(fll, "Disabling FLL_AO\n"); + + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_1_OFFS, + MADERA_FLL_AO_HOLD, MADERA_FLL_AO_HOLD); + regmap_update_bits_check(madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_1_OFFS, + MADERA_FLL_AO_ENA, 0, &change); + + madera_wait_for_fll(fll, false); + + /* + * ctrl_up gates the writes to all fll_ao register, setting it to 0 + * here ensures that after a runtime suspend/resume cycle when one + * enables the fllao then ctrl_up is the last bit that is configured + * by the fllao enable code rather than the cache sync operation which + * would have updated it much earlier before writing out all fllao + * registers + */ + regmap_update_bits(madera->regmap, + fll->base + MADERA_FLLAO_CONTROL_2_OFFS, + MADERA_FLL_AO_CTRL_UPD_MASK, 0); + + if (change) + pm_runtime_put_autosuspend(madera->dev); + + return 0; +} + +int madera_set_fll_ao_refclk(struct madera_fll *fll, int source, + unsigned int fin, unsigned int fout) +{ + int ret = 0; + const struct reg_sequence *patch = NULL; + int patch_size = 0; + unsigned int i; + + if (fll->ref_src == source && + fll->ref_freq == fin && fll->fout == fout) + return 0; + + madera_fll_dbg(fll, "Change FLL_AO refclk to fin=%u fout=%u source=%d\n", + fin, fout, source); + + if (fout && (fll->ref_freq != fin || fll->fout != fout)) { + for (i = 0; i < ARRAY_SIZE(madera_fllao_settings); i++) { + if (madera_fllao_settings[i].fin == fin && + madera_fllao_settings[i].fout == fout) + break; + } + + if (i == ARRAY_SIZE(madera_fllao_settings)) { + madera_fll_err(fll, + "No matching configuration for FLL_AO\n"); + return -EINVAL; + } + + patch = madera_fllao_settings[i].patch; + patch_size = madera_fllao_settings[i].patch_size; + } + + fll->ref_src = source; + fll->ref_freq = fin; + fll->fout = fout; + + if (fout) + ret = madera_enable_fll_ao(fll, patch, patch_size); + else + madera_disable_fll_ao(fll); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_set_fll_ao_refclk); + +/** + * madera_set_output_mode - Set the mode of the specified output + * + * @component: Device to configure + * @output: Output number + * @diff: True to set the output to differential mode + * + * Some systems use external analogue switches to connect more + * analogue devices to the CODEC than are supported by the device. In + * some systems this requires changing the switched output from single + * ended to differential mode dynamically at runtime, an operation + * supported using this function. + * + * Most systems have a single static configuration and should use + * platform data instead. + */ +int madera_set_output_mode(struct snd_soc_component *component, int output, + bool differential) +{ + unsigned int reg, val; + int ret; + + if (output < 1 || output > MADERA_MAX_OUTPUT) + return -EINVAL; + + reg = MADERA_OUTPUT_PATH_CONFIG_1L + (output - 1) * 8; + + if (differential) + val = MADERA_OUT1_MONO; + else + val = 0; + + ret = snd_soc_component_update_bits(component, reg, MADERA_OUT1_MONO, + val); + if (ret < 0) + return ret; + else + return 0; +} +EXPORT_SYMBOL_GPL(madera_set_output_mode); + +static bool madera_eq_filter_unstable(bool mode, __be16 _a, __be16 _b) +{ + s16 a = be16_to_cpu(_a); + s16 b = be16_to_cpu(_b); + + if (!mode) { + return abs(a) >= 4096; + } else { + if (abs(b) >= 4096) + return true; + + return (abs((a << 16) / (4096 - b)) >= 4096 << 4); + } +} + +int madera_eq_coeff_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + struct soc_bytes *params = (void *)kcontrol->private_value; + unsigned int val; + __be16 *data; + int len; + int ret; + + len = params->num_regs * regmap_get_val_bytes(madera->regmap); + + data = kmemdup(ucontrol->value.bytes.data, len, GFP_KERNEL | GFP_DMA); + if (!data) + return -ENOMEM; + + data[0] &= cpu_to_be16(MADERA_EQ1_B1_MODE); + + if (madera_eq_filter_unstable(!!data[0], data[1], data[2]) || + madera_eq_filter_unstable(true, data[4], data[5]) || + madera_eq_filter_unstable(true, data[8], data[9]) || + madera_eq_filter_unstable(true, data[12], data[13]) || + madera_eq_filter_unstable(false, data[16], data[17])) { + dev_err(madera->dev, "Rejecting unstable EQ coefficients\n"); + ret = -EINVAL; + goto out; + } + + ret = regmap_read(madera->regmap, params->base, &val); + if (ret != 0) + goto out; + + val &= ~MADERA_EQ1_B1_MODE; + data[0] |= cpu_to_be16(val); + + ret = regmap_raw_write(madera->regmap, params->base, data, len); + +out: + kfree(data); + + return ret; +} +EXPORT_SYMBOL_GPL(madera_eq_coeff_put); + +int madera_lhpf_coeff_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = + snd_soc_kcontrol_component(kcontrol); + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + __be16 *data = (__be16 *)ucontrol->value.bytes.data; + s16 val = be16_to_cpu(*data); + + if (abs(val) >= 4096) { + dev_err(madera->dev, "Rejecting unstable LHPF coefficients\n"); + return -EINVAL; + } + + return snd_soc_bytes_put(kcontrol, ucontrol); +} +EXPORT_SYMBOL_GPL(madera_lhpf_coeff_put); + +MODULE_SOFTDEP("pre: madera"); +MODULE_DESCRIPTION("ASoC Cirrus Logic Madera codec support"); +MODULE_AUTHOR("Charles Keepax "); +MODULE_AUTHOR("Richard Fitzgerald "); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/madera.h b/sound/soc/codecs/madera.h new file mode 100644 index 000000000000..aa2db156582b --- /dev/null +++ b/sound/soc/codecs/madera.h @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cirrus Logic Madera class codecs common support + * + * Copyright (C) 2015-2018 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2. + */ + +#ifndef ASOC_MADERA_H +#define ASOC_MADERA_H + +#include +#include +#include + +#include "wm_adsp.h" + +#define MADERA_FLL1_REFCLK 1 +#define MADERA_FLL2_REFCLK 2 +#define MADERA_FLL3_REFCLK 3 +#define MADERA_FLLAO_REFCLK 4 +#define MADERA_FLL1_SYNCCLK 5 +#define MADERA_FLL2_SYNCCLK 6 +#define MADERA_FLL3_SYNCCLK 7 +#define MADERA_FLLAO_SYNCCLK 8 + +#define MADERA_FLL_SRC_NONE -1 +#define MADERA_FLL_SRC_MCLK1 0 +#define MADERA_FLL_SRC_MCLK2 1 +#define MADERA_FLL_SRC_SLIMCLK 3 +#define MADERA_FLL_SRC_FLL1 4 +#define MADERA_FLL_SRC_FLL2 5 +#define MADERA_FLL_SRC_AIF1BCLK 8 +#define MADERA_FLL_SRC_AIF2BCLK 9 +#define MADERA_FLL_SRC_AIF3BCLK 10 +#define MADERA_FLL_SRC_AIF4BCLK 11 +#define MADERA_FLL_SRC_AIF1LRCLK 12 +#define MADERA_FLL_SRC_AIF2LRCLK 13 +#define MADERA_FLL_SRC_AIF3LRCLK 14 +#define MADERA_FLL_SRC_AIF4LRCLK 15 + +#define MADERA_CLK_SYSCLK_1 1 +#define MADERA_CLK_ASYNCCLK_1 2 +#define MADERA_CLK_OPCLK 3 +#define MADERA_CLK_ASYNC_OPCLK 4 +#define MADERA_CLK_SYSCLK_2 5 +#define MADERA_CLK_SYSCLK_3 6 +#define MADERA_CLK_ASYNCCLK_2 7 +#define MADERA_CLK_DSPCLK 8 + +#define MADERA_CLK_SRC_MCLK1 0x0 +#define MADERA_CLK_SRC_MCLK2 0x1 +#define MADERA_CLK_SRC_FLL1 0x4 +#define MADERA_CLK_SRC_FLL2 0x5 +#define MADERA_CLK_SRC_FLL3 0x6 +#define MADERA_CLK_SRC_FLLAO_HI 0x7 +#define MADERA_CLK_SRC_FLL1_DIV6 0x7 +#define MADERA_CLK_SRC_AIF1BCLK 0x8 +#define MADERA_CLK_SRC_AIF2BCLK 0x9 +#define MADERA_CLK_SRC_AIF3BCLK 0xA +#define MADERA_CLK_SRC_AIF4BCLK 0xB +#define MADERA_CLK_SRC_FLLAO 0xF + +#define MADERA_MIXER_VOL_MASK 0x00FE +#define MADERA_MIXER_VOL_SHIFT 1 +#define MADERA_MIXER_VOL_WIDTH 7 + +#define MADERA_DOM_GRP_FX 0 +#define MADERA_DOM_GRP_ASRC1 1 +#define MADERA_DOM_GRP_ASRC2 2 +#define MADERA_DOM_GRP_ISRC1 3 +#define MADERA_DOM_GRP_ISRC2 4 +#define MADERA_DOM_GRP_ISRC3 5 +#define MADERA_DOM_GRP_ISRC4 6 +#define MADERA_DOM_GRP_OUT 7 +#define MADERA_DOM_GRP_SPD 8 +#define MADERA_DOM_GRP_DSP1 9 +#define MADERA_DOM_GRP_DSP2 10 +#define MADERA_DOM_GRP_DSP3 11 +#define MADERA_DOM_GRP_DSP4 12 +#define MADERA_DOM_GRP_DSP5 13 +#define MADERA_DOM_GRP_DSP6 14 +#define MADERA_DOM_GRP_DSP7 15 +#define MADERA_DOM_GRP_AIF1 16 +#define MADERA_DOM_GRP_AIF2 17 +#define MADERA_DOM_GRP_AIF3 18 +#define MADERA_DOM_GRP_AIF4 19 +#define MADERA_DOM_GRP_SLIMBUS 20 +#define MADERA_DOM_GRP_PWM 21 +#define MADERA_DOM_GRP_DFC 22 +#define MADERA_N_DOM_GRPS 23 + +#define MADERA_MAX_DAI 11 +#define MADERA_MAX_ADSP 7 + +#define MADERA_NUM_MIXER_INPUTS 148 + +struct madera; +struct wm_adsp; + +struct madera_voice_trigger_info { + /** Which core triggered, 1-based (1 = DSP1, ...) */ + int core_num; +}; + +struct madera_dai_priv { + int clk; + struct snd_pcm_hw_constraint_list constraint; +}; + +struct madera_priv { + struct wm_adsp adsp[MADERA_MAX_ADSP]; + struct madera *madera; + struct device *dev; + int sysclk; + int asyncclk; + int dspclk; + struct madera_dai_priv dai[MADERA_MAX_DAI]; + + int num_inputs; + + unsigned int in_pending; + + unsigned int out_up_pending; + unsigned int out_up_delay; + unsigned int out_down_pending; + unsigned int out_down_delay; + + unsigned int adsp_rate_cache[MADERA_MAX_ADSP]; + + struct mutex rate_lock; + + int tdm_width[MADERA_MAX_AIF]; + int tdm_slots[MADERA_MAX_AIF]; + + int domain_group_ref[MADERA_N_DOM_GRPS]; +}; + +struct madera_fll_cfg { + int n; + unsigned int theta; + unsigned int lambda; + int refdiv; + int fratio; + int gain; + int alt_gain; +}; + +struct madera_fll { + struct madera *madera; + int id; + unsigned int base; + + unsigned int fout; + + int sync_src; + unsigned int sync_freq; + + int ref_src; + unsigned int ref_freq; + struct madera_fll_cfg ref_cfg; +}; + +struct madera_enum { + struct soc_enum mixer_enum; + int val; +}; + +extern const unsigned int madera_ana_tlv[]; +extern const unsigned int madera_eq_tlv[]; +extern const unsigned int madera_digital_tlv[]; +extern const unsigned int madera_noise_tlv[]; +extern const unsigned int madera_ng_tlv[]; + +extern const unsigned int madera_mixer_tlv[]; +extern const char * const madera_mixer_texts[MADERA_NUM_MIXER_INPUTS]; +extern const unsigned int madera_mixer_values[MADERA_NUM_MIXER_INPUTS]; + +#define MADERA_GAINMUX_CONTROLS(name, base) \ + SOC_SINGLE_RANGE_TLV(name " Input Volume", base + 1, \ + MADERA_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \ + madera_mixer_tlv) + +#define MADERA_MIXER_CONTROLS(name, base) \ + SOC_SINGLE_RANGE_TLV(name " Input 1 Volume", base + 1, \ + MADERA_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \ + madera_mixer_tlv), \ + SOC_SINGLE_RANGE_TLV(name " Input 2 Volume", base + 3, \ + MADERA_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \ + madera_mixer_tlv), \ + SOC_SINGLE_RANGE_TLV(name " Input 3 Volume", base + 5, \ + MADERA_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \ + madera_mixer_tlv), \ + SOC_SINGLE_RANGE_TLV(name " Input 4 Volume", base + 7, \ + MADERA_MIXER_VOL_SHIFT, 0x20, 0x50, 0, \ + madera_mixer_tlv) + +#define MADERA_MUX_ENUM_DECL(name, reg) \ + SOC_VALUE_ENUM_SINGLE_AUTODISABLE_DECL( \ + name, reg, 0, 0xff, madera_mixer_texts, madera_mixer_values) + +#define MADERA_MUX_CTL_DECL(name) \ + const struct snd_kcontrol_new name##_mux = \ + SOC_DAPM_ENUM("Route", name##_enum) + +#define MADERA_MUX_ENUMS(name, base_reg) \ + static MADERA_MUX_ENUM_DECL(name##_enum, base_reg); \ + static MADERA_MUX_CTL_DECL(name) + +#define MADERA_MIXER_ENUMS(name, base_reg) \ + MADERA_MUX_ENUMS(name##_in1, base_reg); \ + MADERA_MUX_ENUMS(name##_in2, base_reg + 2); \ + MADERA_MUX_ENUMS(name##_in3, base_reg + 4); \ + MADERA_MUX_ENUMS(name##_in4, base_reg + 6) + +#define MADERA_DSP_AUX_ENUMS(name, base_reg) \ + MADERA_MUX_ENUMS(name##_aux1, base_reg); \ + MADERA_MUX_ENUMS(name##_aux2, base_reg + 8); \ + MADERA_MUX_ENUMS(name##_aux3, base_reg + 16); \ + MADERA_MUX_ENUMS(name##_aux4, base_reg + 24); \ + MADERA_MUX_ENUMS(name##_aux5, base_reg + 32); \ + MADERA_MUX_ENUMS(name##_aux6, base_reg + 40) + +#define MADERA_MUX(name, ctrl) \ + SND_SOC_DAPM_MUX(name, SND_SOC_NOPM, 0, 0, ctrl) + +#define MADERA_MUX_WIDGETS(name, name_str) \ + MADERA_MUX(name_str " Input 1", &name##_mux) + +#define MADERA_MIXER_WIDGETS(name, name_str) \ + MADERA_MUX(name_str " Input 1", &name##_in1_mux), \ + MADERA_MUX(name_str " Input 2", &name##_in2_mux), \ + MADERA_MUX(name_str " Input 3", &name##_in3_mux), \ + MADERA_MUX(name_str " Input 4", &name##_in4_mux), \ + SND_SOC_DAPM_MIXER(name_str " Mixer", SND_SOC_NOPM, 0, 0, NULL, 0) + +#define MADERA_DSP_WIDGETS(name, name_str) \ + MADERA_MIXER_WIDGETS(name##L, name_str "L"), \ + MADERA_MIXER_WIDGETS(name##R, name_str "R"), \ + MADERA_MUX(name_str " Aux 1", &name##_aux1_mux), \ + MADERA_MUX(name_str " Aux 2", &name##_aux2_mux), \ + MADERA_MUX(name_str " Aux 3", &name##_aux3_mux), \ + MADERA_MUX(name_str " Aux 4", &name##_aux4_mux), \ + MADERA_MUX(name_str " Aux 5", &name##_aux5_mux), \ + MADERA_MUX(name_str " Aux 6", &name##_aux6_mux) + +#define MADERA_MUX_ROUTES(widget, name) \ + { widget, NULL, name " Input 1" }, \ + MADERA_MIXER_INPUT_ROUTES(name " Input 1") + +#define MADERA_MIXER_ROUTES(widget, name) \ + { widget, NULL, name " Mixer" }, \ + { name " Mixer", NULL, name " Input 1" }, \ + { name " Mixer", NULL, name " Input 2" }, \ + { name " Mixer", NULL, name " Input 3" }, \ + { name " Mixer", NULL, name " Input 4" }, \ + MADERA_MIXER_INPUT_ROUTES(name " Input 1"), \ + MADERA_MIXER_INPUT_ROUTES(name " Input 2"), \ + MADERA_MIXER_INPUT_ROUTES(name " Input 3"), \ + MADERA_MIXER_INPUT_ROUTES(name " Input 4") + +#define MADERA_DSP_ROUTES(name) \ + { name, NULL, name " Preloader"}, \ + { name " Preload", NULL, name " Preloader"}, \ + { name, NULL, "SYSCLK"}, \ + { name, NULL, "DSPCLK"}, \ + { name, NULL, name " Aux 1" }, \ + { name, NULL, name " Aux 2" }, \ + { name, NULL, name " Aux 3" }, \ + { name, NULL, name " Aux 4" }, \ + { name, NULL, name " Aux 5" }, \ + { name, NULL, name " Aux 6" }, \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 1"), \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 2"), \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 3"), \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 4"), \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 5"), \ + MADERA_MIXER_INPUT_ROUTES(name " Aux 6"), \ + MADERA_MIXER_ROUTES(name, name "L"), \ + MADERA_MIXER_ROUTES(name, name "R") + +#define MADERA_RATE_ENUM(xname, xenum) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname,\ + .info = snd_soc_info_enum_double, \ + .get = snd_soc_get_enum_double, .put = madera_rate_put, \ + .private_value = (unsigned long)&xenum } + +#define MADERA_EQ_CONTROL(xname, xbase) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .info = snd_soc_bytes_info, .get = snd_soc_bytes_get, \ + .put = madera_eq_coeff_put, .private_value = \ + ((unsigned long)&(struct soc_bytes) { .base = xbase, \ + .num_regs = 20, .mask = ~MADERA_EQ1_B1_MODE }) } + +#define MADERA_LHPF_CONTROL(xname, xbase) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .info = snd_soc_bytes_info, .get = snd_soc_bytes_get, \ + .put = madera_lhpf_coeff_put, .private_value = \ + ((unsigned long)&(struct soc_bytes) { .base = xbase, \ + .num_regs = 1 }) } + +#define MADERA_RATES SNDRV_PCM_RATE_KNOT + +#define MADERA_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +#define MADERA_OSR_ENUM_SIZE 5 +#define MADERA_SYNC_RATE_ENUM_SIZE 3 +#define MADERA_ASYNC_RATE_ENUM_SIZE 2 +#define MADERA_RATE_ENUM_SIZE \ + (MADERA_SYNC_RATE_ENUM_SIZE + MADERA_ASYNC_RATE_ENUM_SIZE) +#define MADERA_SAMPLE_RATE_ENUM_SIZE 16 +#define MADERA_DFC_TYPE_ENUM_SIZE 5 +#define MADERA_DFC_WIDTH_ENUM_SIZE 5 + +extern const struct snd_soc_dai_ops madera_dai_ops; +extern const struct snd_soc_dai_ops madera_simple_dai_ops; + +extern const struct snd_kcontrol_new madera_inmux[]; +extern const struct snd_kcontrol_new madera_inmode[]; + +extern const char * const madera_rate_text[MADERA_RATE_ENUM_SIZE]; +extern const unsigned int madera_rate_val[MADERA_RATE_ENUM_SIZE]; + +extern const struct soc_enum madera_sample_rate[]; +extern const struct soc_enum madera_isrc_fsl[]; +extern const struct soc_enum madera_isrc_fsh[]; +extern const struct soc_enum madera_asrc1_rate[]; +extern const struct soc_enum madera_asrc2_rate[]; +extern const struct soc_enum madera_dfc_width[]; +extern const struct soc_enum madera_dfc_type[]; + +extern const struct soc_enum madera_in_vi_ramp; +extern const struct soc_enum madera_in_vd_ramp; + +extern const struct soc_enum madera_out_vi_ramp; +extern const struct soc_enum madera_out_vd_ramp; + +extern const struct soc_enum madera_lhpf1_mode; +extern const struct soc_enum madera_lhpf2_mode; +extern const struct soc_enum madera_lhpf3_mode; +extern const struct soc_enum madera_lhpf4_mode; + +extern const struct soc_enum madera_ng_hold; +extern const struct soc_enum madera_in_hpf_cut_enum; +extern const struct soc_enum madera_in_dmic_osr[]; + +extern const struct soc_enum madera_output_anc_src[]; +extern const struct soc_enum madera_anc_input_src[]; +extern const struct soc_enum madera_anc_ng_enum; + +extern const struct snd_kcontrol_new madera_dsp_trigger_output_mux[]; +extern const struct snd_kcontrol_new madera_drc_activity_output_mux[]; + +extern const struct snd_kcontrol_new madera_adsp_rate_controls[]; + +int madera_dfc_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); + +int madera_lp_mode_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); + +int madera_out1_demux_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); +int madera_out1_demux_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); + +int madera_rate_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); + +int madera_eq_coeff_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); +int madera_lhpf_coeff_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); + +int madera_sysclk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_spk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_in_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_out_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_hp_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_anc_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event); +int madera_domain_clk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, + int event); + +int madera_set_adsp_clk(struct madera_priv *priv, int dsp_num, + unsigned int freq); + +int madera_set_sysclk(struct snd_soc_component *component, int clk_id, + int source, unsigned int freq, int dir); + +int madera_init_fll(struct madera *madera, int id, int base, + struct madera_fll *fll); +int madera_set_fll_refclk(struct madera_fll *fll, int source, + unsigned int fref, unsigned int fout); +int madera_set_fll_syncclk(struct madera_fll *fll, int source, + unsigned int fref, unsigned int fout); +int madera_set_fll_ao_refclk(struct madera_fll *fll, int source, + unsigned int fin, unsigned int fout); + +int madera_core_init(struct madera_priv *priv); +int madera_core_free(struct madera_priv *priv); +int madera_init_overheat(struct madera_priv *priv); +int madera_free_overheat(struct madera_priv *priv); +int madera_init_inputs(struct snd_soc_component *component); +int madera_init_outputs(struct snd_soc_component *component, int n_mono_routes); +int madera_init_bus_error_irq(struct madera_priv *priv, int dsp_num, + irq_handler_t handler); +void madera_free_bus_error_irq(struct madera_priv *priv, int dsp_num); + +int madera_init_dai(struct madera_priv *priv, int dai); + +int madera_set_output_mode(struct snd_soc_component *component, int output, + bool differential); + +/* Following functions are for use by machine drivers */ +static inline int madera_register_notifier(struct snd_soc_component *component, + struct notifier_block *nb) +{ + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + + return blocking_notifier_chain_register(&madera->notifier, nb); +} + +static inline int +madera_unregister_notifier(struct snd_soc_component *component, + struct notifier_block *nb) +{ + struct madera_priv *priv = snd_soc_component_get_drvdata(component); + struct madera *madera = priv->madera; + + return blocking_notifier_chain_unregister(&madera->notifier, nb); +} + +#endif -- cgit v1.2.3 From fd5d10059d5ead12dd12f05ae6d96e70d1fac3df Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Thu, 20 Jun 2019 08:45:00 +0100 Subject: regulator: da9061/62: Adjust LDO voltage selection minimum value According to the DA9061 and DA9062 datasheets the LDO voltage selection registers have a lower value of 0x02. This applies to voltage registers VLDO1_A, VLDO2_A, VLDO3_A and VLDO4_A. This linear offset of 0x02 was previously not observed by the driver, causing the LDO output voltage to be systematically lower by two steps (= 0.1V). This patch fixes the minimum linear selector offset by setting it to a value of 2 and increases the n_voltages by the same amount allowing voltages in the range 0x02 -> 0.9V to 0x38 -> 3.6V to be correctly selected. Also fixes an incorrect calculaton for the n_voltages value in the regulator LDO2. These fixes effect all LDO regulators for DA9061 and DA9062. Acked-by: Steve Twiss Tested-by: Steve Twiss Signed-off-by: Felix Riemann Signed-off-by: Steve Twiss Signed-off-by: Mark Brown --- drivers/regulator/da9062-regulator.c | 40 +++++++++++++++++++++--------------- include/linux/mfd/da9062/registers.h | 3 +++ 2 files changed, 27 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/da9062-regulator.c b/drivers/regulator/da9062-regulator.c index a02e0488410f..2ffc64622451 100644 --- a/drivers/regulator/da9062-regulator.c +++ b/drivers/regulator/da9062-regulator.c @@ -493,12 +493,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO1_CONT, .desc.enable_mask = DA9062AA_LDO1_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO1_A, .desc.vsel_mask = DA9062AA_VLDO1_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO1_A, __builtin_ffs((int)DA9062AA_LDO1_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -525,12 +526,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (600))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO2_CONT, .desc.enable_mask = DA9062AA_LDO2_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO2_A, .desc.vsel_mask = DA9062AA_VLDO2_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO2_A, __builtin_ffs((int)DA9062AA_LDO2_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -557,12 +559,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO3_CONT, .desc.enable_mask = DA9062AA_LDO3_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO3_A, .desc.vsel_mask = DA9062AA_VLDO3_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO3_A, __builtin_ffs((int)DA9062AA_LDO3_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -589,12 +592,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO4_CONT, .desc.enable_mask = DA9062AA_LDO4_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO4_A, .desc.vsel_mask = DA9062AA_VLDO4_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO4_A, __builtin_ffs((int)DA9062AA_LDO4_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -769,12 +773,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO1_CONT, .desc.enable_mask = DA9062AA_LDO1_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO1_A, .desc.vsel_mask = DA9062AA_VLDO1_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO1_A, __builtin_ffs((int)DA9062AA_LDO1_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -801,12 +806,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (600))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO2_CONT, .desc.enable_mask = DA9062AA_LDO2_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO2_A, .desc.vsel_mask = DA9062AA_VLDO2_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO2_A, __builtin_ffs((int)DA9062AA_LDO2_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -833,12 +839,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO3_CONT, .desc.enable_mask = DA9062AA_LDO3_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO3_A, .desc.vsel_mask = DA9062AA_VLDO3_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO3_A, __builtin_ffs((int)DA9062AA_LDO3_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - @@ -865,12 +872,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = { .desc.ops = &da9062_ldo_ops, .desc.min_uV = (900) * 1000, .desc.uV_step = (50) * 1000, - .desc.n_voltages = ((3600) - (900))/(50) + 1, + .desc.n_voltages = ((3600) - (900))/(50) + 1 + + DA9062AA_VLDO_A_MIN_SEL, .desc.enable_reg = DA9062AA_LDO4_CONT, .desc.enable_mask = DA9062AA_LDO4_EN_MASK, .desc.vsel_reg = DA9062AA_VLDO4_A, .desc.vsel_mask = DA9062AA_VLDO4_A_MASK, - .desc.linear_min_sel = 0, + .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL, .sleep = REG_FIELD(DA9062AA_VLDO4_A, __builtin_ffs((int)DA9062AA_LDO4_SL_A_MASK) - 1, sizeof(unsigned int) * 8 - diff --git a/include/linux/mfd/da9062/registers.h b/include/linux/mfd/da9062/registers.h index fe04b708742b..2906bf6160fb 100644 --- a/include/linux/mfd/da9062/registers.h +++ b/include/linux/mfd/da9062/registers.h @@ -797,6 +797,9 @@ #define DA9062AA_BUCK3_SL_A_SHIFT 7 #define DA9062AA_BUCK3_SL_A_MASK BIT(7) +/* DA9062AA_VLDO[1-4]_A common */ +#define DA9062AA_VLDO_A_MIN_SEL 2 + /* DA9062AA_VLDO1_A = 0x0A9 */ #define DA9062AA_VLDO1_A_SHIFT 0 #define DA9062AA_VLDO1_A_MASK 0x3f -- cgit v1.2.3 From 116b9731ad7614032a390bb9ad8998a14d6dc752 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 26 May 2019 17:34:02 +0300 Subject: fsnotify: add empty fsnotify_{unlink,rmdir}() hooks We would like to move fsnotify_nameremove() calls from d_delete() into a higher layer where the hook makes more sense and so we can consider every d_delete() call site individually. Start by creating empty hook fsnotify_{unlink,rmdir}() and place them in the proper VFS call sites. After all d_delete() call sites will be converted to use the new hook, the new hook will generate the delete events and fsnotify_nameremove() hook will be removed. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/namei.c | 2 ++ include/linux/fsnotify.h | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 20831c2fbb34..209c51a5226c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); + fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); @@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!error) { dont_mount(dentry); detach_mounts(dentry); + fsnotify_unlink(dir, dentry); } } } diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 94972e8eb6d1..7f23eddefcd0 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -188,6 +188,19 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, &new_dentry->d_name, 0); } +/* + * fsnotify_unlink - 'name' was unlinked + * + * Caller must make sure that dentry->d_name is stable. + */ +static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) +{ + /* Expected to be called before d_delete() */ + WARN_ON_ONCE(d_is_negative(dentry)); + + /* TODO: call fsnotify_dirent() */ +} + /* * fsnotify_mkdir - directory 'name' was created */ @@ -198,6 +211,19 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } +/* + * fsnotify_rmdir - directory 'name' was removed + * + * Caller must make sure that dentry->d_name is stable. + */ +static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) +{ + /* Expected to be called before d_delete() */ + WARN_ON_ONCE(d_is_negative(dentry)); + + /* TODO: call fsnotify_dirent() */ +} + /* * fsnotify_access - file was read */ -- cgit v1.2.3 From 49246466a98996e78b68a0041807dbd2628c53fe Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 26 May 2019 17:34:10 +0300 Subject: fsnotify: move fsnotify_nameremove() hook out of d_delete() d_delete() was piggy backed for the fsnotify_nameremove() hook when in fact not all callers of d_delete() care about fsnotify events. For all callers of d_delete() that may be interested in fsnotify events, we made sure to call one of fsnotify_{unlink,rmdir}() hooks before calling d_delete(). Now we can move the fsnotify_nameremove() call from d_delete() to the fsnotify_{unlink,rmdir}() hooks. Two explicit calls to fsnotify_nameremove() from nfs/afs sillyrename are also removed. This will cause a change of behavior - nfs/afs will NOT generate an fsnotify delete event when renaming over a positive dentry. This change is desirable, because it is consistent with the behavior of all other filesystems. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/afs/dir_silly.c | 5 ----- fs/dcache.c | 2 -- fs/nfs/unlink.c | 6 ------ include/linux/fsnotify.h | 2 ++ 4 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 057b8d322422..361088a5edb9 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); - - /* vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(old, 0); } kfree(scb); diff --git a/fs/dcache.c b/fs/dcache.c index c435398f2c81..f41121e5d1ec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; - int isdir = d_is_dir(dentry); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); @@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } - fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 52d533967485..0effeee28352 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) nfs_cancel_async_unlink(dentry); return; } - - /* - * vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(dentry, 0); } #define SILLYNAME_PREFIX ".nfs" diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 7f23eddefcd0..0145073c2b42 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -199,6 +199,7 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) WARN_ON_ONCE(d_is_negative(dentry)); /* TODO: call fsnotify_dirent() */ + fsnotify_nameremove(dentry, 0); } /* @@ -222,6 +223,7 @@ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) WARN_ON_ONCE(d_is_negative(dentry)); /* TODO: call fsnotify_dirent() */ + fsnotify_nameremove(dentry, 1); } /* -- cgit v1.2.3 From 7377f5bec13332bc470856f337935be6cabbcf24 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 26 May 2019 17:34:11 +0300 Subject: fsnotify: get rid of fsnotify_nameremove() For all callers of fsnotify_{unlink,rmdir}(), we made sure that d_parent and d_name are stable. Therefore, fsnotify_{unlink,rmdir}() do not need the safety measures in fsnotify_nameremove() to stabilize parent and name. We can now simplify those hooks and get rid of fsnotify_nameremove(). Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 41 ---------------------------------------- include/linux/fsnotify.h | 6 ++---- include/linux/fsnotify_backend.h | 4 ---- 3 files changed, 2 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4eb2ebfac468..2ecef6155fc0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -94,47 +94,6 @@ void fsnotify_sb_delete(struct super_block *sb) fsnotify_clear_marks_by_sb(sb); } -/* - * fsnotify_nameremove - a filename was removed from a directory - * - * This is mostly called under parent vfs inode lock so name and - * dentry->d_parent should be stable. However there are some corner cases where - * inode lock is not held. So to be on the safe side and be reselient to future - * callers and out of tree users of d_delete(), we do not assume that d_parent - * and d_name are stable and we use dget_parent() and - * take_dentry_name_snapshot() to grab stable references. - */ -void fsnotify_nameremove(struct dentry *dentry, int isdir) -{ - struct dentry *parent; - struct name_snapshot name; - __u32 mask = FS_DELETE; - - /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ - if (IS_ROOT(dentry)) - return; - - if (isdir) - mask |= FS_ISDIR; - - parent = dget_parent(dentry); - /* Avoid unneeded take_dentry_name_snapshot() */ - if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) && - !(dentry->d_sb->s_fsnotify_mask & FS_DELETE)) - goto out_dput; - - take_dentry_name_snapshot(&name, dentry); - - fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &name.name, 0); - - release_dentry_name_snapshot(&name); - -out_dput: - dput(parent); -} -EXPORT_SYMBOL(fsnotify_nameremove); - /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 0145073c2b42..a2d5d175d3c1 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -198,8 +198,7 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) /* Expected to be called before d_delete() */ WARN_ON_ONCE(d_is_negative(dentry)); - /* TODO: call fsnotify_dirent() */ - fsnotify_nameremove(dentry, 0); + fsnotify_dirent(dir, dentry, FS_DELETE); } /* @@ -222,8 +221,7 @@ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) /* Expected to be called before d_delete() */ WARN_ON_ONCE(d_is_negative(dentry)); - /* TODO: call fsnotify_dirent() */ - fsnotify_nameremove(dentry, 1); + fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a9f9dcc1e515..c28f6ed1f59b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -355,7 +355,6 @@ extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); -extern void fsnotify_nameremove(struct dentry *dentry, int isdir); extern u32 fsnotify_get_cookie(void); static inline int fsnotify_inode_watches_children(struct inode *inode) @@ -525,9 +524,6 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} -static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) -{} - static inline void fsnotify_update_flags(struct dentry *dentry) {} -- cgit v1.2.3 From cf4b20ecfa4edc4a0e55d52bc0a735f60bdfe7eb Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 18 Jun 2019 17:34:42 +0200 Subject: mmc: sdio: Turn sdio_run_irqs() into static All external users of sdio_run_irqs() have converted into using the preferred sdio_signal_irq() interface, thus not calling the function directly any more. Avoid further new users of it, by turning it into static. Suggested-by: Douglas Anderson Signed-off-by: Ulf Hansson Reviewed-by: Douglas Anderson Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio_irq.c | 3 +-- include/linux/mmc/host.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index 9f54a259a1b3..0bcc5e83bd1a 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -92,7 +92,7 @@ static int process_sdio_pending_irqs(struct mmc_host *host) return ret; } -void sdio_run_irqs(struct mmc_host *host) +static void sdio_run_irqs(struct mmc_host *host) { mmc_claim_host(host); if (host->sdio_irqs) { @@ -103,7 +103,6 @@ void sdio_run_irqs(struct mmc_host *host) } mmc_release_host(host); } -EXPORT_SYMBOL_GPL(sdio_run_irqs); void sdio_irq_work(struct work_struct *work) { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ecb7972e2423..a9b12322c775 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -504,7 +504,6 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host) wake_up_process(host->sdio_irq_thread); } -void sdio_run_irqs(struct mmc_host *host); void sdio_signal_irq(struct mmc_host *host); #ifdef CONFIG_REGULATOR -- cgit v1.2.3 From f924cddebc900f7cb10d5538d69523e558fa681c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:00 +0200 Subject: block: remove blk_init_request_from_bio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lightnvm should have never used this function, as it is sending passthrough requests, so switch it to blk_rq_append_bio like all the other passthrough request users. Inline blk_init_request_from_bio into the only remaining caller. Reviewed-by: Hannes Reinecke Reviewed-by: Minwoo Im Reviewed-by: Javier González Reviewed-by: Matias Bjørling Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ----------- block/blk-mq.c | 7 ++++++- drivers/nvme/host/lightnvm.c | 2 +- include/linux/blkdev.h | 1 - 4 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index b6f22f219389..d1c7c69a20dd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -687,17 +687,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, return false; } -void blk_init_request_from_bio(struct request *req, struct bio *bio) -{ - if (bio->bi_opf & REQ_RAHEAD) - req->cmd_flags |= REQ_FAILFAST_MASK; - - req->__sector = bio->bi_iter.bi_sector; - req->write_hint = bio->bi_write_hint; - blk_rq_bio_prep(req->q, req, bio); -} -EXPORT_SYMBOL_GPL(blk_init_request_from_bio); - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; diff --git a/block/blk-mq.c b/block/blk-mq.c index ce0f5f4ede70..61457bffa55f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1766,7 +1766,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) { - blk_init_request_from_bio(rq, bio); + if (bio->bi_opf & REQ_RAHEAD) + rq->cmd_flags |= REQ_FAILFAST_MASK; + + rq->__sector = bio->bi_iter.bi_sector; + rq->write_hint = bio->bi_write_hint; + blk_rq_bio_prep(rq->q, rq, bio); blk_account_io_start(rq, true); } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 4f20a10b39d3..ba009d4c9dfa 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -660,7 +660,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (rqd->bio) - blk_init_request_from_bio(rq, rqd->bio); + blk_rq_append_bio(rq, &rqd->bio); else rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad49a775c54f..2d4dfe82767a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -828,7 +828,6 @@ extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); -extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -- cgit v1.2.3 From 14ccb66b3f585b2bc21e7256c96090abed5a512c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:01 +0200 Subject: block: remove the bi_phys_segments field in struct bio We only need the number of segments in the blk-mq submission path. Remove the field from struct bio, and return it from a variant of blk_queue_split instead of that it can passed as an argument to those functions that need the value. This also means we stop recounting segments except for cloning and partial segments. To keep the number of arguments in this how path down remove pointless struct request_queue arguments from any of the functions that had it and grew a nr_segs argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 1 - block/bfq-iosched.c | 5 +-- block/bio.c | 15 +-------- block/blk-core.c | 32 ++++++++---------- block/blk-map.c | 10 ++++-- block/blk-merge.c | 75 +++++++++++++++--------------------------- block/blk-mq-sched.c | 26 ++++++++------- block/blk-mq-sched.h | 10 +++--- block/blk-mq.c | 23 ++++++------- block/blk.h | 23 ++++++------- block/kyber-iosched.c | 5 +-- block/mq-deadline.c | 5 +-- drivers/md/raid5.c | 1 - include/linux/bio.h | 1 - include/linux/blk-mq.h | 2 +- include/linux/blk_types.h | 6 ---- include/linux/blkdev.h | 1 - include/linux/elevator.h | 2 +- 18 files changed, 106 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index ac18b488cb5e..31c177663ed5 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -436,7 +436,6 @@ struct bio { struct bvec_iter bi_iter; /* current index into bio_vec array */ unsigned int bi_size; /* total size in bytes */ - unsigned short bi_phys_segments; /* segments after physaddr coalesce*/ unsigned short bi_hw_segments; /* segments after DMA remapping */ unsigned int bi_max; /* max bio_vecs we can hold used as index into pool */ diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f8d430f88d25..a6bf842cbe16 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2027,7 +2027,8 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; @@ -2050,7 +2051,7 @@ static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bfqd->bio_bfqq = NULL; bfqd->bio_bic = bic; - ret = blk_mq_sched_try_merge(q, bio, &free); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); if (free) blk_mq_free_request(free); diff --git a/block/bio.c b/block/bio.c index 4bcdcd3f63f4..ad9c3aa9bf7d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -558,14 +558,6 @@ void bio_put(struct bio *bio) } EXPORT_SYMBOL(bio_put); -int bio_phys_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_phys_segments; -} - /** * __bio_clone_fast - clone a bio that shares the original bio's biovec * @bio: destination bio @@ -739,7 +731,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, if (bio_full(bio)) return 0; - if (bio->bi_phys_segments >= queue_max_segments(q)) + if (bio->bi_vcnt >= queue_max_segments(q)) return 0; bvec = &bio->bi_io_vec[bio->bi_vcnt]; @@ -749,8 +741,6 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, bio->bi_vcnt++; done: bio->bi_iter.bi_size += len; - bio->bi_phys_segments = bio->bi_vcnt; - bio_set_flag(bio, BIO_SEG_VALID); return len; } @@ -1909,10 +1899,7 @@ void bio_trim(struct bio *bio, int offset, int size) if (offset == 0 && size == bio->bi_iter.bi_size) return; - bio_clear_flag(bio, BIO_SEG_VALID); - bio_advance(bio, offset << 9); - bio->bi_iter.bi_size = size; if (bio_integrity(bio)) diff --git a/block/blk-core.c b/block/blk-core.c index d1c7c69a20dd..ef998a724b27 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -550,15 +550,15 @@ void blk_put_request(struct request *req) } EXPORT_SYMBOL(blk_put_request); -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, - struct bio *bio) +bool bio_attempt_back_merge(struct request *req, struct bio *bio, + unsigned int nr_segs) { const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - if (!ll_back_merge_fn(q, req, bio)) + if (!ll_back_merge_fn(req, bio, nr_segs)) return false; - trace_block_bio_backmerge(q, req, bio); + trace_block_bio_backmerge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -571,15 +571,15 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, return true; } -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, - struct bio *bio) +bool bio_attempt_front_merge(struct request *req, struct bio *bio, + unsigned int nr_segs) { const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - if (!ll_front_merge_fn(q, req, bio)) + if (!ll_front_merge_fn(req, bio, nr_segs)) return false; - trace_block_bio_frontmerge(q, req, bio); + trace_block_bio_frontmerge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -621,6 +621,7 @@ no_merge: * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at * @bio: new bio being queued + * @nr_segs: number of segments in @bio * @same_queue_rq: pointer to &struct request that gets filled in when * another request associated with @q is found on the plug list * (optional, may be %NULL) @@ -639,7 +640,7 @@ no_merge: * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - struct request **same_queue_rq) + unsigned int nr_segs, struct request **same_queue_rq) { struct blk_plug *plug; struct request *rq; @@ -668,10 +669,10 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, switch (blk_try_merge(rq, bio)) { case ELEVATOR_BACK_MERGE: - merged = bio_attempt_back_merge(q, rq, bio); + merged = bio_attempt_back_merge(rq, bio, nr_segs); break; case ELEVATOR_FRONT_MERGE: - merged = bio_attempt_front_merge(q, rq, bio); + merged = bio_attempt_front_merge(rq, bio, nr_segs); break; case ELEVATOR_DISCARD_MERGE: merged = bio_attempt_discard_merge(q, rq, bio); @@ -1427,14 +1428,9 @@ bool blk_update_request(struct request *req, blk_status_t error, } EXPORT_SYMBOL_GPL(blk_update_request); -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, - struct bio *bio) +void blk_rq_bio_prep(struct request *rq, struct bio *bio, unsigned int nr_segs) { - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - else if (bio_op(bio) == REQ_OP_DISCARD) - rq->nr_phys_segments = 1; - + rq->nr_phys_segments = nr_segs; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->ioprio = bio_prio(bio); diff --git a/block/blk-map.c b/block/blk-map.c index db9373bd31ac..3a62e471d81b 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -18,13 +18,19 @@ int blk_rq_append_bio(struct request *rq, struct bio **bio) { struct bio *orig_bio = *bio; + struct bvec_iter iter; + struct bio_vec bv; + unsigned int nr_segs = 0; blk_queue_bounce(rq->q, bio); + bio_for_each_bvec(bv, *bio, iter) + nr_segs++; + if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, *bio); + blk_rq_bio_prep(rq, *bio, nr_segs); } else { - if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (!ll_back_merge_fn(rq, *bio, nr_segs)) { if (orig_bio != *bio) { bio_put(*bio); *bio = orig_bio; diff --git a/block/blk-merge.c b/block/blk-merge.c index 17713d7d98d5..72b4fd89a22d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -258,32 +258,29 @@ split: return do_split ? new : NULL; } -void blk_queue_split(struct request_queue *q, struct bio **bio) +void __blk_queue_split(struct request_queue *q, struct bio **bio, + unsigned int *nr_segs) { - struct bio *split, *res; - unsigned nsegs; + struct bio *split; switch (bio_op(*bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: - split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs); break; case REQ_OP_WRITE_ZEROES: - split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, + nr_segs); break; case REQ_OP_WRITE_SAME: - split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_write_same_split(q, *bio, &q->bio_split, + nr_segs); break; default: - split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); break; } - /* physical segments can be figured out during splitting */ - res = split ? split : *bio; - res->bi_phys_segments = nsegs; - bio_set_flag(res, BIO_SEG_VALID); - if (split) { /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; @@ -304,6 +301,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio) *bio = split; } } + +void blk_queue_split(struct request_queue *q, struct bio **bio) +{ + unsigned int nr_segs; + + __blk_queue_split(q, bio, &nr_segs); +} EXPORT_SYMBOL(blk_queue_split); static unsigned int __blk_recalc_rq_segments(struct request_queue *q, @@ -338,17 +342,6 @@ void blk_recalc_rq_segments(struct request *rq) rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); } -void blk_recount_segments(struct request_queue *q, struct bio *bio) -{ - struct bio *nxt = bio->bi_next; - - bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); - bio->bi_next = nxt; - - bio_set_flag(bio, BIO_SEG_VALID); -} - static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) { @@ -519,16 +512,13 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -static inline int ll_new_hw_segment(struct request_queue *q, - struct request *req, - struct bio *bio) +static inline int ll_new_hw_segment(struct request *req, struct bio *bio, + unsigned int nr_phys_segs) { - int nr_phys_segs = bio_phys_segments(q, bio); - - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(req->q)) goto no_merge; - if (blk_integrity_merge_bio(q, req, bio) == false) + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; /* @@ -539,12 +529,11 @@ static inline int ll_new_hw_segment(struct request_queue *q, return 1; no_merge: - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } -int ll_back_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio) +int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) { if (req_gap_back_merge(req, bio)) return 0; @@ -553,21 +542,15 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req, blk_rq_pos(req))) { - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } - if (!bio_flagged(req->biotail, BIO_SEG_VALID)) - blk_recount_segments(q, req->biotail); - if (!bio_flagged(bio, BIO_SEG_VALID)) - blk_recount_segments(q, bio); - return ll_new_hw_segment(q, req, bio); + return ll_new_hw_segment(req, bio, nr_segs); } -int ll_front_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio) +int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) { - if (req_gap_front_merge(req, bio)) return 0; if (blk_integrity_rq(req) && @@ -575,15 +558,11 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } - if (!bio_flagged(bio, BIO_SEG_VALID)) - blk_recount_segments(q, bio); - if (!bio_flagged(req->bio, BIO_SEG_VALID)) - blk_recount_segments(q, req->bio); - return ll_new_hw_segment(q, req, bio); + return ll_new_hw_segment(req, bio, nr_segs); } static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 2766066a15db..956a7aa9a637 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -224,7 +224,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) } bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, - struct request **merged_request) + unsigned int nr_segs, struct request **merged_request) { struct request *rq; @@ -232,7 +232,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_BACK_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_back_merge(q, rq, bio)) + if (!bio_attempt_back_merge(rq, bio, nr_segs)) return false; *merged_request = attempt_back_merge(q, rq); if (!*merged_request) @@ -241,7 +241,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_FRONT_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_front_merge(q, rq, bio)) + if (!bio_attempt_front_merge(rq, bio, nr_segs)) return false; *merged_request = attempt_front_merge(q, rq); if (!*merged_request) @@ -260,7 +260,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); * of them. */ bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio) + struct bio *bio, unsigned int nr_segs) { struct request *rq; int checked = 8; @@ -277,11 +277,13 @@ bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, switch (blk_try_merge(rq, bio)) { case ELEVATOR_BACK_MERGE: if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_back_merge(q, rq, bio); + merged = bio_attempt_back_merge(rq, bio, + nr_segs); break; case ELEVATOR_FRONT_MERGE: if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_front_merge(q, rq, bio); + merged = bio_attempt_front_merge(rq, bio, + nr_segs); break; case ELEVATOR_DISCARD_MERGE: merged = bio_attempt_discard_merge(q, rq, bio); @@ -304,13 +306,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge); */ static bool blk_mq_attempt_merge(struct request_queue *q, struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct bio *bio) + struct blk_mq_ctx *ctx, struct bio *bio, + unsigned int nr_segs) { enum hctx_type type = hctx->type; lockdep_assert_held(&ctx->lock); - if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) { + if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) { ctx->rq_merged++; return true; } @@ -318,7 +321,8 @@ static bool blk_mq_attempt_merge(struct request_queue *q, return false; } -bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); @@ -328,7 +332,7 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) if (e && e->type->ops.bio_merge) { blk_mq_put_ctx(ctx); - return e->type->ops.bio_merge(hctx, bio); + return e->type->ops.bio_merge(hctx, bio, nr_segs); } type = hctx->type; @@ -336,7 +340,7 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) !list_empty_careful(&ctx->rq_lists[type])) { /* default per sw-queue merge */ spin_lock(&ctx->lock); - ret = blk_mq_attempt_merge(q, hctx, ctx, bio); + ret = blk_mq_attempt_merge(q, hctx, ctx, bio, nr_segs); spin_unlock(&ctx->lock); } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 3cf92cbbd8ac..cf22ab00fefb 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -12,8 +12,9 @@ void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, - struct request **merged_request); -bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); + unsigned int nr_segs, struct request **merged_request); +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); @@ -31,12 +32,13 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_requests(struct request_queue *q); static inline bool -blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return false; - return __blk_mq_sched_bio_merge(q, bio); + return __blk_mq_sched_bio_merge(q, bio, nr_segs); } static inline bool diff --git a/block/blk-mq.c b/block/blk-mq.c index 61457bffa55f..d89383847d09 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1764,14 +1764,15 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) } } -static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) +static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, + unsigned int nr_segs) { if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; rq->__sector = bio->bi_iter.bi_sector; rq->write_hint = bio->bi_write_hint; - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq, bio, nr_segs); blk_account_io_start(rq, true); } @@ -1941,20 +1942,20 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) struct request *rq; struct blk_plug *plug; struct request *same_queue_rq = NULL; + unsigned int nr_segs; blk_qc_t cookie; blk_queue_bounce(q, &bio); - - blk_queue_split(q, &bio); + __blk_queue_split(q, &bio, &nr_segs); if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; if (!is_flush_fua && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &same_queue_rq)) + blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq)) return BLK_QC_T_NONE; - if (blk_mq_sched_bio_merge(q, bio)) + if (blk_mq_sched_bio_merge(q, bio, nr_segs)) return BLK_QC_T_NONE; rq_qos_throttle(q, bio); @@ -1977,7 +1978,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug = current->plug; if (unlikely(is_flush_fua)) { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); /* bypass scheduler for flush rq */ blk_insert_flush(rq); @@ -1991,7 +1992,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) struct request *last = NULL; blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); if (!request_count) trace_block_plug(q); @@ -2006,7 +2007,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_add_rq_to_plug(plug, rq); } else if (plug && !blk_queue_nomerges(q)) { - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,11 +2036,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && !data.hctx->dispatch_busy)) { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); blk_mq_sched_insert_request(rq, false, true, true); } diff --git a/block/blk.h b/block/blk.h index 7814aa207153..a1d33cb65842 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,8 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, - struct bio *bio); +void blk_rq_bio_prep(struct request *rq, struct bio *bio, unsigned int nr_segs); void blk_freeze_queue(struct request_queue *q); static inline void blk_queue_enter_live(struct request_queue *q) @@ -154,14 +153,14 @@ static inline bool bio_integrity_endio(struct bio *bio) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, - struct bio *bio); -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, - struct bio *bio); +bool bio_attempt_front_merge(struct request *req, struct bio *bio, + unsigned int nr_segs); +bool bio_attempt_back_merge(struct request *req, struct bio *bio, + unsigned int nr_segs); bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - struct request **same_queue_rq); + unsigned int nr_segs, struct request **same_queue_rq); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); @@ -202,10 +201,12 @@ static inline int blk_should_fake_timeout(struct request_queue *q) } #endif -int ll_back_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio); -int ll_front_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio); +void __blk_queue_split(struct request_queue *q, struct bio **bio, + unsigned int *nr_segs); +int ll_back_merge_fn(struct request *req, struct bio *bio, + unsigned int nr_segs); +int ll_front_merge_fn(struct request *req, struct bio *bio, + unsigned int nr_segs); struct request *attempt_back_merge(struct request_queue *q, struct request *rq); struct request *attempt_front_merge(struct request_queue *q, struct request *rq); int blk_attempt_req_merge(struct request_queue *q, struct request *rq, diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index c3b05119cebd..3c2602601741 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -562,7 +562,8 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct kyber_hctx_data *khd = hctx->sched_data; struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); @@ -572,7 +573,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bool merged; spin_lock(&kcq->lock); - merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio); + merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); spin_unlock(&kcq->lock); blk_mq_put_ctx(ctx); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 1876f5712bfd..b8a682b5a1bb 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -469,7 +469,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; @@ -477,7 +478,7 @@ static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bool ret; spin_lock(&dd->lock); - ret = blk_mq_sched_try_merge(q, bio, &free); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); spin_unlock(&dd->lock); if (free) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da94cbaa1a9e..3de4e13bde98 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5251,7 +5251,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; bio_set_dev(align_bi, rdev->bdev); - bio_clear_flag(align_bi, BIO_SEG_VALID); if (is_badblock(rdev, align_bi->bi_iter.bi_sector, bio_sectors(align_bi), diff --git a/include/linux/bio.h b/include/linux/bio.h index 0f23b5682640..ee11c4324751 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,7 +408,6 @@ static inline void bio_wouldblock_error(struct bio *bio) } struct request_queue; -extern int bio_phys_segments(struct request_queue *, struct bio *); extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 15d1aa53d96c..3fa1fa59f9b2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -306,7 +306,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs bool blk_mq_complete_request(struct request *rq); void blk_mq_complete_request_sync(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio); + struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 95202f80676c..6a53799c3fe2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -154,11 +154,6 @@ struct bio { blk_status_t bi_status; u8 bi_partno; - /* Number of segments in this BIO after - * physical address coalescing is performed. - */ - unsigned int bi_phys_segments; - struct bvec_iter bi_iter; atomic_t __bi_remaining; @@ -210,7 +205,6 @@ struct bio { */ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ - BIO_SEG_VALID, /* bi_phys_segments valid */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_USER_MAPPED, /* contains user pages */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2d4dfe82767a..d5d3bb45dfb6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -841,7 +841,6 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_queue_split(struct request_queue *, struct bio **); -extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 6e8bc53740f0..169bb2e02516 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); + bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); -- cgit v1.2.3 From 239eeb085753d4356f731a773f363eb5bed4fe81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:19 +0200 Subject: blk-cgroup: factor out a helper to read rwstat counter Trying to break up the crazy statements to something readable. Also switch to an unsigned counter as it can't ever turn negative. Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 ++--- include/linux/blk-cgroup.h | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 440797293235..0778e52b1db2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -745,7 +745,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; struct blkg_rwstat sum = { }; - int i; + unsigned int i; lockdep_assert_held(&blkg->q->queue_lock); @@ -762,8 +762,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) + - percpu_counter_sum_positive(&rwstat->cpu_cnt[i]), + atomic64_add(blkg_rwstat_read_counter(rwstat, i), &sum.aux_cnt[i]); } rcu_read_unlock(); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 76c61318fda5..06236f56a840 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -198,6 +198,13 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); +static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, + unsigned int idx) +{ + return atomic64_read(&rwstat->aux_cnt[idx]) + + percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); +} + const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, -- cgit v1.2.3 From 5d0b6e48cbef3219c0ed75e0e746c4ed259303c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:20 +0200 Subject: blk-cgroup: pass blkg_rwstat structures by reference Returning a structure generates rather bad code, so switch to passing by reference. Also don't require the structure to be zeroed and add to the 0-initialized counters, but actually set the counters to the calculated value. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 15 +++++++++------ block/blk-cgroup.c | 31 ++++++++++++++++--------------- include/linux/blk-cgroup.h | 14 +++++++------- 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b3796a40a61a..66abc82179f3 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -935,9 +935,9 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd), - &blkcg_policy_bfq, - off); + struct blkg_rwstat sum; + + blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } @@ -975,9 +975,12 @@ static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes)); - u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + + struct blkg_rwstat tmp; + u64 sum; + + blkg_rwstat_recursive_sum(pd->blkg, NULL, + offsetof(struct blkcg_gq, stat_bytes), &tmp); + sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); return __blkg_prfill_u64(sf, pd, sum >> 9); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0778e52b1db2..db039a869d95 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -597,8 +597,9 @@ EXPORT_SYMBOL_GPL(blkg_prfill_stat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); + struct blkg_rwstat rwstat = { }; + blkg_rwstat_read((void *)pd + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); @@ -606,8 +607,9 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); static u64 blkg_prfill_rwstat_field(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off); + struct blkg_rwstat rwstat = { }; + blkg_rwstat_read((void *)pd->blkg + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } @@ -649,8 +651,9 @@ static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg, - NULL, off); + struct blkg_rwstat rwstat; + + blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } @@ -731,6 +734,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * @blkg: blkg of interest * @pol: blkcg_policy which contains the blkg_rwstat * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg + * @sum: blkg_rwstat structure containing the results * * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its * online descendants and their aux counts. The caller must be holding the @@ -739,12 +743,11 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it * is at @off bytes into @blkg's blkg_policy_data of the policy. */ -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off) +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat *sum) { struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; - struct blkg_rwstat sum = { }; unsigned int i; lockdep_assert_held(&blkg->q->queue_lock); @@ -762,12 +765,10 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(blkg_rwstat_read_counter(rwstat, i), - &sum.aux_cnt[i]); + atomic64_set(&sum->aux_cnt[i], + blkg_rwstat_read_counter(rwstat, i)); } rcu_read_unlock(); - - return sum; } EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); @@ -953,14 +954,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) spin_lock_irq(&blkg->q->queue_lock); - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes)); + blkg_rwstat_recursive_sum(blkg, NULL, + offsetof(struct blkcg_gq, stat_bytes), &rwstat); rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_ios)); + blkg_rwstat_recursive_sum(blkg, NULL, + offsetof(struct blkcg_gq, stat_ios), &rwstat); rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 06236f56a840..3ee858111274 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -224,8 +224,8 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -700,15 +700,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * * Read the current snapshot of @rwstat and return it in the aux counts. */ -static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) +static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, + struct blkg_rwstat *result) { - struct blkg_rwstat result; int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result.aux_cnt[i], + atomic64_set(&result->aux_cnt[i], percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); - return result; } /** @@ -721,8 +720,9 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); + struct blkg_rwstat tmp = { }; + blkg_rwstat_read(rwstat, &tmp); return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); } -- cgit v1.2.3 From 7af6fd9112ba310a889c60d0606b4b74049cfe14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:21 +0200 Subject: blk-cgroup: introduce a new struct blkg_rwstat_sample When sampling the blkcg counts we don't need atomics or per-cpu variables. Introduce a new structure just containing plain u64 counters. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 10 ++++------ block/blk-cgroup.c | 39 +++++++++++++++++++-------------------- include/linux/blk-cgroup.h | 22 ++++++++++++---------- 3 files changed, 35 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 66abc82179f3..624374a99c6e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -935,7 +935,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat sum; + struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); @@ -975,15 +975,13 @@ static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat tmp; - u64 sum; + struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, NULL, offsetof(struct blkcg_gq, stat_bytes), &tmp); - sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); - return __blkg_prfill_u64(sf, pd, sum >> 9); + return __blkg_prfill_u64(sf, pd, + (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index db039a869d95..664c09866839 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(__blkg_prfill_u64); * Print @rwstat to @sf for the device assocaited with @pd. */ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat *rwstat) + const struct blkg_rwstat_sample *rwstat) { static const char *rwstr[] = { [BLKG_RWSTAT_READ] = "Read", @@ -562,12 +562,12 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, for (i = 0; i < BLKG_RWSTAT_NR; i++) seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], - (unsigned long long)atomic64_read(&rwstat->aux_cnt[i])); + rwstat->cnt[i]); - v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]) + - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_DISCARD]); - seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); + v = rwstat->cnt[BLKG_RWSTAT_READ] + + rwstat->cnt[BLKG_RWSTAT_WRITE] + + rwstat->cnt[BLKG_RWSTAT_DISCARD]; + seq_printf(sf, "%s Total %llu\n", dname, v); return v; } EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); @@ -597,7 +597,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_stat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = { }; + struct blkg_rwstat_sample rwstat = { }; blkg_rwstat_read((void *)pd + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -607,7 +607,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); static u64 blkg_prfill_rwstat_field(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = { }; + struct blkg_rwstat_sample rwstat = { }; blkg_rwstat_read((void *)pd->blkg + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -651,7 +651,7 @@ static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat; + struct blkg_rwstat_sample rwstat; blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -734,7 +734,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * @blkg: blkg of interest * @pol: blkcg_policy which contains the blkg_rwstat * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg - * @sum: blkg_rwstat structure containing the results + * @sum: blkg_rwstat_sample structure containing the results * * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its * online descendants and their aux counts. The caller must be holding the @@ -744,7 +744,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * is at @off bytes into @blkg's blkg_policy_data of the policy. */ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat *sum) + int off, struct blkg_rwstat_sample *sum) { struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; @@ -765,8 +765,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&sum->aux_cnt[i], - blkg_rwstat_read_counter(rwstat, i)); + sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); } rcu_read_unlock(); } @@ -934,7 +933,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { const char *dname; char *buf; - struct blkg_rwstat rwstat; + struct blkg_rwstat_sample rwstat; u64 rbytes, wbytes, rios, wios, dbytes, dios; size_t size = seq_get_buf(sf, &buf), off = 0; int i; @@ -956,15 +955,15 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_bytes), &rwstat); - rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); - wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); - dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); + rbytes = rwstat.cnt[BLKG_RWSTAT_READ]; + wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE]; + dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD]; blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_ios), &rwstat); - rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); - wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); - dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); + rios = rwstat.cnt[BLKG_RWSTAT_READ]; + wios = rwstat.cnt[BLKG_RWSTAT_WRITE]; + dios = rwstat.cnt[BLKG_RWSTAT_DISCARD]; spin_unlock_irq(&blkg->q->queue_lock); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3ee858111274..e4a81767e111 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -63,8 +63,7 @@ struct blkcg { /* * blkg_[rw]stat->aux_cnt is excluded for local stats but included for - * recursive. Used to carry stats of dead children, and, for blkg_rwstat, - * to carry result values from read and sum operations. + * recursive. Used to carry stats of dead children. */ struct blkg_stat { struct percpu_counter cpu_cnt; @@ -76,6 +75,10 @@ struct blkg_rwstat { atomic64_t aux_cnt[BLKG_RWSTAT_NR]; }; +struct blkg_rwstat_sample { + u64 cnt[BLKG_RWSTAT_NR]; +}; + /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -213,7 +216,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat *rwstat); + const struct blkg_rwstat_sample *rwstat); u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); @@ -225,7 +228,7 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat *sum); + int off, struct blkg_rwstat_sample *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -701,13 +704,13 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * Read the current snapshot of @rwstat and return it in the aux counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, - struct blkg_rwstat *result) + struct blkg_rwstat_sample *result) { int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result->aux_cnt[i], - percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); + result->cnt[i] = + percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); } /** @@ -720,11 +723,10 @@ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = { }; + struct blkg_rwstat_sample tmp = { }; blkg_rwstat_read(rwstat, &tmp); - return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; } /** -- cgit v1.2.3 From c0ce79dca5b0e8373a546ebea2af7b3df94c584e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:22 +0200 Subject: blk-cgroup: move struct blkg_stat to bfq This structure and assorted infrastructure is only used by the bfq I/O scheduler. Move it there instead of bloating the common code. Acked-by: Tejun Heo Acked-by: Paolo Valente Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 192 ++++++++++++++++++++++++++++++++++++--------- block/bfq-iosched.h | 19 +++-- block/blk-cgroup.c | 56 ------------- include/linux/blk-cgroup.h | 71 ----------------- 4 files changed, 167 insertions(+), 171 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 624374a99c6e..a691dca7e966 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -17,6 +17,124 @@ #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) +static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) +{ + int ret; + + ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); + if (ret) + return ret; + + atomic64_set(&stat->aux_cnt, 0); + return 0; +} + +static void bfq_stat_exit(struct bfq_stat *stat) +{ + percpu_counter_destroy(&stat->cpu_cnt); +} + +/** + * bfq_stat_add - add a value to a bfq_stat + * @stat: target bfq_stat + * @val: value to add + * + * Add @val to @stat. The caller must ensure that IRQ on the same CPU + * don't re-enter this function for the same counter. + */ +static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) +{ + percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); +} + +/** + * bfq_stat_read - read the current value of a bfq_stat + * @stat: bfq_stat to read + */ +static inline uint64_t bfq_stat_read(struct bfq_stat *stat) +{ + return percpu_counter_sum_positive(&stat->cpu_cnt); +} + +/** + * bfq_stat_reset - reset a bfq_stat + * @stat: bfq_stat to reset + */ +static inline void bfq_stat_reset(struct bfq_stat *stat) +{ + percpu_counter_set(&stat->cpu_cnt, 0); + atomic64_set(&stat->aux_cnt, 0); +} + +/** + * bfq_stat_add_aux - add a bfq_stat into another's aux count + * @to: the destination bfq_stat + * @from: the source + * + * Add @from's count including the aux one to @to's aux count. + */ +static inline void bfq_stat_add_aux(struct bfq_stat *to, + struct bfq_stat *from) +{ + atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), + &to->aux_cnt); +} + +/** + * bfq_stat_recursive_sum - collect hierarchical bfq_stat + * @blkg: blkg of interest + * @pol: blkcg_policy which contains the bfq_stat + * @off: offset to the bfq_stat in blkg_policy_data or @blkg + * + * Collect the bfq_stat specified by @blkg, @pol and @off and all its + * online descendants and their aux counts. The caller must be holding the + * queue lock for online tests. + * + * If @pol is NULL, bfq_stat is at @off bytes into @blkg; otherwise, it is + * at @off bytes into @blkg's blkg_policy_data of the policy. + */ +static u64 bfq_stat_recursive_sum(struct blkcg_gq *blkg, + struct blkcg_policy *pol, int off) +{ + struct blkcg_gq *pos_blkg; + struct cgroup_subsys_state *pos_css; + u64 sum = 0; + + lockdep_assert_held(&blkg->q->queue_lock); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { + struct bfq_stat *stat; + + if (!pos_blkg->online) + continue; + + if (pol) + stat = (void *)blkg_to_pd(pos_blkg, pol) + off; + else + stat = (void *)blkg + off; + + sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); + } + rcu_read_unlock(); + + return sum; +} + +/** + * blkg_prfill_stat - prfill callback for bfq_stat + * @sf: seq_file to print to + * @pd: policy private data of interest + * @off: offset to the bfq_stat in @pd + * + * prfill callback for printing a bfq_stat. + */ +static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); +} + /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, @@ -53,7 +171,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) now = ktime_get_ns(); if (now > stats->start_group_wait_time) - blkg_stat_add(&stats->group_wait_time, + bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } @@ -82,14 +200,14 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) now = ktime_get_ns(); if (now > stats->start_empty_time) - blkg_stat_add(&stats->empty_time, + bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { - blkg_stat_add(&bfqg->stats.dequeue, 1); + bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) @@ -119,7 +237,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg) u64 now = ktime_get_ns(); if (now > stats->start_idle_time) - blkg_stat_add(&stats->idle_time, + bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } @@ -137,9 +255,9 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; - blkg_stat_add(&stats->avg_queue_size_sum, + bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); - blkg_stat_add(&stats->avg_queue_size_samples, 1); + bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } @@ -279,13 +397,13 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); - blkg_stat_reset(&stats->time); - blkg_stat_reset(&stats->avg_queue_size_sum); - blkg_stat_reset(&stats->avg_queue_size_samples); - blkg_stat_reset(&stats->dequeue); - blkg_stat_reset(&stats->group_wait_time); - blkg_stat_reset(&stats->idle_time); - blkg_stat_reset(&stats->empty_time); + bfq_stat_reset(&stats->time); + bfq_stat_reset(&stats->avg_queue_size_sum); + bfq_stat_reset(&stats->avg_queue_size_samples); + bfq_stat_reset(&stats->dequeue); + bfq_stat_reset(&stats->group_wait_time); + bfq_stat_reset(&stats->idle_time); + bfq_stat_reset(&stats->empty_time); #endif } @@ -300,14 +418,14 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); - blkg_stat_add_aux(&from->time, &from->time); - blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); - blkg_stat_add_aux(&to->avg_queue_size_samples, + bfq_stat_add_aux(&from->time, &from->time); + bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); + bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); - blkg_stat_add_aux(&to->dequeue, &from->dequeue); - blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); - blkg_stat_add_aux(&to->idle_time, &from->idle_time); - blkg_stat_add_aux(&to->empty_time, &from->empty_time); + bfq_stat_add_aux(&to->dequeue, &from->dequeue); + bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); + bfq_stat_add_aux(&to->idle_time, &from->idle_time); + bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } @@ -360,13 +478,13 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); - blkg_stat_exit(&stats->time); - blkg_stat_exit(&stats->avg_queue_size_sum); - blkg_stat_exit(&stats->avg_queue_size_samples); - blkg_stat_exit(&stats->dequeue); - blkg_stat_exit(&stats->group_wait_time); - blkg_stat_exit(&stats->idle_time); - blkg_stat_exit(&stats->empty_time); + bfq_stat_exit(&stats->time); + bfq_stat_exit(&stats->avg_queue_size_sum); + bfq_stat_exit(&stats->avg_queue_size_samples); + bfq_stat_exit(&stats->dequeue); + bfq_stat_exit(&stats->group_wait_time); + bfq_stat_exit(&stats->idle_time); + bfq_stat_exit(&stats->empty_time); #endif } @@ -377,13 +495,13 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || - blkg_stat_init(&stats->time, gfp) || - blkg_stat_init(&stats->avg_queue_size_sum, gfp) || - blkg_stat_init(&stats->avg_queue_size_samples, gfp) || - blkg_stat_init(&stats->dequeue, gfp) || - blkg_stat_init(&stats->group_wait_time, gfp) || - blkg_stat_init(&stats->idle_time, gfp) || - blkg_stat_init(&stats->empty_time, gfp)) { + bfq_stat_init(&stats->time, gfp) || + bfq_stat_init(&stats->avg_queue_size_sum, gfp) || + bfq_stat_init(&stats->avg_queue_size_samples, gfp) || + bfq_stat_init(&stats->dequeue, gfp) || + bfq_stat_init(&stats->group_wait_time, gfp) || + bfq_stat_init(&stats->idle_time, gfp) || + bfq_stat_init(&stats->empty_time, gfp)) { bfqg_stats_exit(stats); return -ENOMEM; } @@ -927,7 +1045,7 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v) static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd), + u64 sum = bfq_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); return __blkg_prfill_u64(sf, pd, sum); } @@ -996,11 +1114,11 @@ static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); - u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples); + u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { - v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum); + v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index c2faa77824f8..aef4fa0046b8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -777,6 +777,11 @@ enum bfqq_expiration { BFQQE_PREEMPTED /* preemption in progress */ }; +struct bfq_stat { + struct percpu_counter cpu_cnt; + atomic64_t aux_cnt; +}; + struct bfqg_stats { #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) /* number of ios merged */ @@ -788,19 +793,19 @@ struct bfqg_stats { /* number of IOs queued up */ struct blkg_rwstat queued; /* total disk time and nr sectors dispatched by this group */ - struct blkg_stat time; + struct bfq_stat time; /* sum of number of ios queued across all samples */ - struct blkg_stat avg_queue_size_sum; + struct bfq_stat avg_queue_size_sum; /* count of samples taken for average */ - struct blkg_stat avg_queue_size_samples; + struct bfq_stat avg_queue_size_samples; /* how many times this group has been removed from service tree */ - struct blkg_stat dequeue; + struct bfq_stat dequeue; /* total time spent waiting for it to be assigned a timeslice. */ - struct blkg_stat group_wait_time; + struct bfq_stat group_wait_time; /* time spent idling for this blkcg_gq */ - struct blkg_stat idle_time; + struct bfq_stat idle_time; /* total time with empty current active q with other requests queued */ - struct blkg_stat empty_time; + struct bfq_stat empty_time; /* fields after this shouldn't be cleared on stat reset */ u64 start_group_wait_time; u64 start_idle_time; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 664c09866839..53b7bd4c7000 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -572,20 +572,6 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, } EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); -/** - * blkg_prfill_stat - prfill callback for blkg_stat - * @sf: seq_file to print to - * @pd: policy private data of interest - * @off: offset to the blkg_stat in @pd - * - * prfill callback for printing a blkg_stat. - */ -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) -{ - return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); -} -EXPORT_SYMBOL_GPL(blkg_prfill_stat); - /** * blkg_prfill_rwstat - prfill callback for blkg_rwstat * @sf: seq_file to print to @@ -687,48 +673,6 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v) } EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive); -/** - * blkg_stat_recursive_sum - collect hierarchical blkg_stat - * @blkg: blkg of interest - * @pol: blkcg_policy which contains the blkg_stat - * @off: offset to the blkg_stat in blkg_policy_data or @blkg - * - * Collect the blkg_stat specified by @blkg, @pol and @off and all its - * online descendants and their aux counts. The caller must be holding the - * queue lock for online tests. - * - * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is - * at @off bytes into @blkg's blkg_policy_data of the policy. - */ -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off) -{ - struct blkcg_gq *pos_blkg; - struct cgroup_subsys_state *pos_css; - u64 sum = 0; - - lockdep_assert_held(&blkg->q->queue_lock); - - rcu_read_lock(); - blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { - struct blkg_stat *stat; - - if (!pos_blkg->online) - continue; - - if (pol) - stat = (void *)blkg_to_pd(pos_blkg, pol) + off; - else - stat = (void *)blkg + off; - - sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt); - } - rcu_read_unlock(); - - return sum; -} -EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); - /** * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat * @blkg: blkg of interest diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a81767e111..33f23a858438 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -65,11 +65,6 @@ struct blkcg { * blkg_[rw]stat->aux_cnt is excluded for local stats but included for * recursive. Used to carry stats of dead children. */ -struct blkg_stat { - struct percpu_counter cpu_cnt; - atomic64_t aux_cnt; -}; - struct blkg_rwstat { struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; atomic64_t aux_cnt[BLKG_RWSTAT_NR]; @@ -217,7 +212,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, const struct blkg_rwstat_sample *rwstat); -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); int blkg_print_stat_bytes(struct seq_file *sf, void *v); @@ -225,8 +219,6 @@ int blkg_print_stat_ios(struct seq_file *sf, void *v); int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off, struct blkg_rwstat_sample *sum); @@ -579,69 +571,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) -{ - int ret; - - ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); - if (ret) - return ret; - - atomic64_set(&stat->aux_cnt, 0); - return 0; -} - -static inline void blkg_stat_exit(struct blkg_stat *stat) -{ - percpu_counter_destroy(&stat->cpu_cnt); -} - -/** - * blkg_stat_add - add a value to a blkg_stat - * @stat: target blkg_stat - * @val: value to add - * - * Add @val to @stat. The caller must ensure that IRQ on the same CPU - * don't re-enter this function for the same counter. - */ -static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) -{ - percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); -} - -/** - * blkg_stat_read - read the current value of a blkg_stat - * @stat: blkg_stat to read - */ -static inline uint64_t blkg_stat_read(struct blkg_stat *stat) -{ - return percpu_counter_sum_positive(&stat->cpu_cnt); -} - -/** - * blkg_stat_reset - reset a blkg_stat - * @stat: blkg_stat to reset - */ -static inline void blkg_stat_reset(struct blkg_stat *stat) -{ - percpu_counter_set(&stat->cpu_cnt, 0); - atomic64_set(&stat->aux_cnt, 0); -} - -/** - * blkg_stat_add_aux - add a blkg_stat into another's aux count - * @to: the destination blkg_stat - * @from: the source - * - * Add @from's count including the aux one to @to's aux count. - */ -static inline void blkg_stat_add_aux(struct blkg_stat *to, - struct blkg_stat *from) -{ - atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), - &to->aux_cnt); -} - static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) { int i, ret; -- cgit v1.2.3 From e47bc4eda953928644109101d07c9c95dc29a458 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 20 Jun 2019 10:59:16 -0700 Subject: block: add centralize REQ_OP_XXX to string helper In order to centralize the REQ_OP_XXX to string conversion which can be used in the block layer and different places in the kernel like f2fs, this patch adds a new helper function along with an array similar to the one present in the blk-mq-debugfs.c. We keep this helper functionality centralize under blk-core.c instead of blk-mq-debugfs.c since blk-core.c is configured using CONFIG_BLOCK and it will not be dependent on blk-mq-debugfs.c which is configured using CONFIG_BLK_DEBUG_FS. Next patch adjusts the code in the blk-mq-debugfs.c with newly introduced helper. Reviewed-by: Bart Van Assche Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- block/blk-core.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 3 +++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index c97da29ddc07..129204dd3bae 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -120,6 +120,42 @@ void blk_rq_init(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_rq_init); +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name +static const char *const blk_op_name[] = { + REQ_OP_NAME(READ), + REQ_OP_NAME(WRITE), + REQ_OP_NAME(FLUSH), + REQ_OP_NAME(DISCARD), + REQ_OP_NAME(SECURE_ERASE), + REQ_OP_NAME(ZONE_RESET), + REQ_OP_NAME(WRITE_SAME), + REQ_OP_NAME(WRITE_ZEROES), + REQ_OP_NAME(SCSI_IN), + REQ_OP_NAME(SCSI_OUT), + REQ_OP_NAME(DRV_IN), + REQ_OP_NAME(DRV_OUT), +}; +#undef REQ_OP_NAME + +/** + * blk_op_str - Return string XXX in the REQ_OP_XXX. + * @op: REQ_OP_XXX. + * + * Description: Centralize block layer function to convert REQ_OP_XXX into + * string format. Useful in the debugging and tracing bio or request. For + * invalid REQ_OP_XXX it returns string "UNKNOWN". + */ +inline const char *blk_op_str(unsigned int op) +{ + const char *op_str = "UNKNOWN"; + + if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) + op_str = blk_op_name[op]; + + return op_str; +} +EXPORT_SYMBOL_GPL(blk_op_str); + static const struct { int errno; const char *name; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5d3bb45dfb6..0c482371c8b3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,6 +865,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +/* Helper to convert REQ_OP_XXX to its string format XXX */ +extern const char *blk_op_str(unsigned int op); + int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -- cgit v1.2.3 From 4635873c561ac57b66adfcc2487c38106b1c916c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 28 Apr 2019 15:39:30 +0800 Subject: scsi: lib/sg_pool.c: improve APIs for allocating sg pool sg_alloc_table_chained() currently allows the caller to provide one preallocated SGL and returns if the requested number isn't bigger than size of that SGL. This is used to inline an SGL for an IO request. However, scattergather code only allows that size of the 1st preallocated SGL to be SG_CHUNK_SIZE(128). This means a substantial amount of memory (4KB) is claimed for the SGL for each IO request. If the I/O is small, it would be prudent to allocate a smaller SGL. Introduce an extra parameter to sg_alloc_table_chained() and sg_free_table_chained() for specifying size of the preallocated SGL. Both __sg_free_table() and __sg_alloc_table() assume that each SGL has the same size except for the last one. Change the code to allow both functions to accept a variable size for the 1st preallocated SGL. [mkp: attempted to clarify commit desc] Cc: Christoph Hellwig Cc: Bart Van Assche Cc: Ewan D. Milne Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Chuck Lever Cc: netdev@vger.kernel.org Cc: linux-nvme@lists.infradead.org Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/nvme/host/fc.c | 7 ++++--- drivers/nvme/host/rdma.c | 7 ++++--- drivers/nvme/target/loop.c | 4 ++-- drivers/scsi/scsi_lib.c | 10 ++++++---- include/linux/scatterlist.h | 11 +++++++---- lib/scatterlist.c | 36 +++++++++++++++++++++++------------- lib/sg_pool.c | 37 +++++++++++++++++++++++++++---------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 5 +++-- 8 files changed, 76 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dd8169bbf0d2..46811caac9d2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2112,7 +2112,8 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, freq->sg_table.sgl = freq->first_sgl; ret = sg_alloc_table_chained(&freq->sg_table, - blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); + blk_rq_nr_phys_segments(rq), freq->sg_table.sgl, + SG_CHUNK_SIZE); if (ret) return -ENOMEM; @@ -2122,7 +2123,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, dir); if (unlikely(freq->sg_cnt <= 0)) { - sg_free_table_chained(&freq->sg_table, true); + sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE); freq->sg_cnt = 0; return -EFAULT; } @@ -2148,7 +2149,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, nvme_cleanup_cmd(rq); - sg_free_table_chained(&freq->sg_table, true); + sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE); freq->sg_cnt = 0; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f383146e7d0f..f7ea19b45798 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1133,7 +1133,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); nvme_cleanup_cmd(rq); - sg_free_table_chained(&req->sg_table, true); + sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1248,7 +1248,8 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->sg_table.sgl = req->first_sgl; ret = sg_alloc_table_chained(&req->sg_table, - blk_rq_nr_phys_segments(rq), req->sg_table.sgl); + blk_rq_nr_phys_segments(rq), req->sg_table.sgl, + SG_CHUNK_SIZE); if (ret) return -ENOMEM; @@ -1288,7 +1289,7 @@ out_unmap_sg: req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); out_free_table: - sg_free_table_chained(&req->sg_table, true); + sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE); return ret; } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9e211ad6bdd3..b16dc3981c69 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -77,7 +77,7 @@ static void nvme_loop_complete_rq(struct request *req) struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_cleanup_cmd(req); - sg_free_table_chained(&iod->sg_table, true); + sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE); nvme_complete_rq(req); } @@ -157,7 +157,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), - iod->sg_table.sgl)) + iod->sg_table.sgl, SG_CHUNK_SIZE)) return BLK_STS_RESOURCE; iod->req.sg = iod->sg_table.sgl; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0916bd6d22b0..acc0f7080f18 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -541,9 +541,9 @@ static void scsi_uninit_cmd(struct scsi_cmnd *cmd) static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd) { if (cmd->sdb.table.nents) - sg_free_table_chained(&cmd->sdb.table, true); + sg_free_table_chained(&cmd->sdb.table, SG_CHUNK_SIZE); if (scsi_prot_sg_count(cmd)) - sg_free_table_chained(&cmd->prot_sdb->table, true); + sg_free_table_chained(&cmd->prot_sdb->table, SG_CHUNK_SIZE); } static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) @@ -976,7 +976,8 @@ static blk_status_t scsi_init_sgtable(struct request *req, * If sg table allocation fails, requeue request later. */ if (unlikely(sg_alloc_table_chained(&sdb->table, - blk_rq_nr_phys_segments(req), sdb->table.sgl))) + blk_rq_nr_phys_segments(req), sdb->table.sgl, + SG_CHUNK_SIZE))) return BLK_STS_RESOURCE; /* @@ -1030,7 +1031,8 @@ blk_status_t scsi_init_io(struct scsi_cmnd *cmd) ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (sg_alloc_table_chained(&prot_sdb->table, ivecs, - prot_sdb->table.sgl)) { + prot_sdb->table.sgl, + SG_CHUNK_SIZE)) { ret = BLK_STS_RESOURCE; goto out_free_sgtables; } diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 30a9a55c28ba..6eec50fb36c8 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -266,10 +266,11 @@ int sg_split(struct scatterlist *in, const int in_mapped_nents, typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); -void __sg_free_table(struct sg_table *, unsigned int, bool, sg_free_fn *); +void __sg_free_table(struct sg_table *, unsigned int, unsigned int, + sg_free_fn *); void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, - struct scatterlist *, gfp_t, sg_alloc_fn *); + struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, @@ -331,9 +332,11 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, #endif #ifdef CONFIG_SG_POOL -void sg_free_table_chained(struct sg_table *table, bool first_chunk); +void sg_free_table_chained(struct sg_table *table, + unsigned nents_first_chunk); int sg_alloc_table_chained(struct sg_table *table, int nents, - struct scatterlist *first_chunk); + struct scatterlist *first_chunk, + unsigned nents_first_chunk); #endif /* diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 739dc9fe2c55..77ec8eec3fd0 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -181,7 +181,8 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * __sg_free_table - Free a previously mapped sg table * @table: The sg table header to use * @max_ents: The maximum number of entries per single scatterlist - * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk + * @nents_first_chunk: Number of entries int the (preallocated) first + * scatterlist chunk, 0 means no such preallocated first chunk * @free_fn: Free function * * Description: @@ -191,9 +192,10 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * **/ void __sg_free_table(struct sg_table *table, unsigned int max_ents, - bool skip_first_chunk, sg_free_fn *free_fn) + unsigned int nents_first_chunk, sg_free_fn *free_fn) { struct scatterlist *sgl, *next; + unsigned curr_max_ents = nents_first_chunk ?: max_ents; if (unlikely(!table->sgl)) return; @@ -209,9 +211,9 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, * sg_size is then one less than alloc size, since the last * element is the chain pointer. */ - if (alloc_size > max_ents) { - next = sg_chain_ptr(&sgl[max_ents - 1]); - alloc_size = max_ents; + if (alloc_size > curr_max_ents) { + next = sg_chain_ptr(&sgl[curr_max_ents - 1]); + alloc_size = curr_max_ents; sg_size = alloc_size - 1; } else { sg_size = alloc_size; @@ -219,11 +221,12 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, } table->orig_nents -= sg_size; - if (skip_first_chunk) - skip_first_chunk = false; + if (nents_first_chunk) + nents_first_chunk = 0; else free_fn(sgl, alloc_size); sgl = next; + curr_max_ents = max_ents; } table->sgl = NULL; @@ -246,6 +249,8 @@ EXPORT_SYMBOL(sg_free_table); * @table: The sg table header to use * @nents: Number of entries in sg list * @max_ents: The maximum number of entries the allocator returns per call + * @nents_first_chunk: Number of entries int the (preallocated) first + * scatterlist chunk, 0 means no such preallocated chunk provided by user * @gfp_mask: GFP allocation mask * @alloc_fn: Allocator to use * @@ -262,10 +267,13 @@ EXPORT_SYMBOL(sg_free_table); **/ int __sg_alloc_table(struct sg_table *table, unsigned int nents, unsigned int max_ents, struct scatterlist *first_chunk, - gfp_t gfp_mask, sg_alloc_fn *alloc_fn) + unsigned int nents_first_chunk, gfp_t gfp_mask, + sg_alloc_fn *alloc_fn) { struct scatterlist *sg, *prv; unsigned int left; + unsigned curr_max_ents = nents_first_chunk ?: max_ents; + unsigned prv_max_ents; memset(table, 0, sizeof(*table)); @@ -281,8 +289,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, do { unsigned int sg_size, alloc_size = left; - if (alloc_size > max_ents) { - alloc_size = max_ents; + if (alloc_size > curr_max_ents) { + alloc_size = curr_max_ents; sg_size = alloc_size - 1; } else sg_size = alloc_size; @@ -316,7 +324,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, * If this is not the first mapping, chain previous part. */ if (prv) - sg_chain(prv, max_ents, sg); + sg_chain(prv, prv_max_ents, sg); else table->sgl = sg; @@ -327,6 +335,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, sg_mark_end(&sg[sg_size - 1]); prv = sg; + prv_max_ents = curr_max_ents; + curr_max_ents = max_ents; } while (left); return 0; @@ -349,9 +359,9 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) int ret; ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, - NULL, gfp_mask, sg_kmalloc); + NULL, 0, gfp_mask, sg_kmalloc); if (unlikely(ret)) - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree); + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree); return ret; } diff --git a/lib/sg_pool.c b/lib/sg_pool.c index d1c1e6388eaa..b3b8cf62ff49 100644 --- a/lib/sg_pool.c +++ b/lib/sg_pool.c @@ -69,18 +69,27 @@ static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask) /** * sg_free_table_chained - Free a previously mapped sg table * @table: The sg table header to use - * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained? + * @nents_first_chunk: size of the first_chunk SGL passed to + * sg_alloc_table_chained * * Description: * Free an sg table previously allocated and setup with * sg_alloc_table_chained(). * + * @nents_first_chunk has to be same with that same parameter passed + * to sg_alloc_table_chained(). + * **/ -void sg_free_table_chained(struct sg_table *table, bool first_chunk) +void sg_free_table_chained(struct sg_table *table, + unsigned nents_first_chunk) { - if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE) + if (table->orig_nents <= nents_first_chunk) return; - __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free); + + if (nents_first_chunk == 1) + nents_first_chunk = 0; + + __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free); } EXPORT_SYMBOL_GPL(sg_free_table_chained); @@ -89,31 +98,39 @@ EXPORT_SYMBOL_GPL(sg_free_table_chained); * @table: The sg table header to use * @nents: Number of entries in sg list * @first_chunk: first SGL + * @nents_first_chunk: number of the SGL of @first_chunk * * Description: * Allocate and chain SGLs in an sg table. If @nents@ is larger than - * SG_CHUNK_SIZE a chained sg table will be setup. + * @nents_first_chunk a chained sg table will be setup. * **/ int sg_alloc_table_chained(struct sg_table *table, int nents, - struct scatterlist *first_chunk) + struct scatterlist *first_chunk, unsigned nents_first_chunk) { int ret; BUG_ON(!nents); - if (first_chunk) { - if (nents <= SG_CHUNK_SIZE) { + if (first_chunk && nents_first_chunk) { + if (nents <= nents_first_chunk) { table->nents = table->orig_nents = nents; sg_init_table(table->sgl, nents); return 0; } } + /* User supposes that the 1st SGL includes real entry */ + if (nents_first_chunk == 1) { + first_chunk = NULL; + nents_first_chunk = 0; + } + ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE, - first_chunk, GFP_ATOMIC, sg_pool_alloc); + first_chunk, nents_first_chunk, + GFP_ATOMIC, sg_pool_alloc); if (unlikely(ret)) - sg_free_table_chained(table, (bool)first_chunk); + sg_free_table_chained(table, nents_first_chunk); return ret; } EXPORT_SYMBOL_GPL(sg_alloc_table_chained); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 2121c9b4d275..48fe3b16b0d9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -73,7 +73,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, - ctxt->rw_sg_table.sgl)) { + ctxt->rw_sg_table.sgl, + SG_CHUNK_SIZE)) { kfree(ctxt); ctxt = NULL; } @@ -84,7 +85,7 @@ out: static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, struct svc_rdma_rw_ctxt *ctxt) { - sg_free_table_chained(&ctxt->rw_sg_table, true); + sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); spin_lock(&rdma->sc_rw_ctxt_lock); list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts); -- cgit v1.2.3 From aa0bfcd939c30617385ffa28682c062d78050eba Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 20 Jun 2019 17:05:37 -0400 Subject: mm: add filemap_fdatawait_range_keep_errors() In the spirit of filemap_fdatawait_range() and filemap_fdatawait_keep_errors(), introduce filemap_fdatawait_range_keep_errors() which both takes a range upon which to wait and does not clear errors from the address space. Signed-off-by: Ross Zwisler Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- include/linux/fs.h | 2 ++ mm/filemap.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..79fec8a8413f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2712,6 +2712,8 @@ extern int filemap_flush(struct address_space *); extern int filemap_fdatawait_keep_errors(struct address_space *mapping); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping, + loff_t start_byte, loff_t end_byte); static inline int filemap_fdatawait(struct address_space *mapping) { diff --git a/mm/filemap.c b/mm/filemap.c index df2006ba0cfa..e87252ca0835 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -553,6 +553,28 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, } EXPORT_SYMBOL(filemap_fdatawait_range); +/** + * filemap_fdatawait_range_keep_errors - wait for writeback to complete + * @mapping: address space structure to wait for + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Walk the list of under-writeback pages of the given address space in the + * given range and wait for all of them. Unlike filemap_fdatawait_range(), + * this function does not clear error status of the address space. + * + * Use this function if callers don't handle errors themselves. Expected + * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), + * fsfreeze(8) + */ +int filemap_fdatawait_range_keep_errors(struct address_space *mapping, + loff_t start_byte, loff_t end_byte) +{ + __filemap_fdatawait_range(mapping, start_byte, end_byte); + return filemap_check_and_keep_errors(mapping); +} +EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors); + /** * file_fdatawait_range - wait for writeback to complete * @file: file pointing to address space structure to wait for -- cgit v1.2.3 From 6ba0e7dc64a5adcda2fbe65adc466891795d639e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 20 Jun 2019 17:24:56 -0400 Subject: jbd2: introduce jbd2_inode dirty range scoping Currently both journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() operate on the entire address space of each of the inodes associated with a given journal entry. The consequence of this is that if we have an inode where we are constantly appending dirty pages we can end up waiting for an indefinite amount of time in journal_finish_inode_data_buffers() while we wait for all the pages under writeback to be written out. The easiest way to cause this type of workload is do just dd from /dev/zero to a file until it fills the entire filesystem. This can cause journal_finish_inode_data_buffers() to wait for the duration of the entire dd operation. We can improve this situation by scoping each of the inode dirty ranges associated with a given transaction. We do this via the jbd2_inode structure so that the scoping is contained within jbd2 and so that it follows the lifetime and locking rules for that structure. This allows us to limit the writeback & wait in journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() respectively to the dirty range for a given struct jdb2_inode, keeping us from waiting forever if the inode in question is still being appended to. Signed-off-by: Ross Zwisler Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/jbd2/commit.c | 23 +++++++++++++++++------ fs/jbd2/journal.c | 4 ++++ fs/jbd2/transaction.c | 49 ++++++++++++++++++++++++++++--------------------- include/linux/jbd2.h | 22 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c8c1d6cc6e5d..132fb92098c7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal, * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping) +static int journal_submit_inode_data_buffers(struct address_space *mapping, + loff_t dirty_start, loff_t dirty_end) { int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = 0, - .range_end = i_size_read(mapping->host), + .range_start = dirty_start, + .range_end = dirty_end, }; ret = generic_writepages(mapping, &wbc); @@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WRITE_DATA)) continue; mapping = jinode->i_vfs_inode->i_mapping; @@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping); + err = journal_submit_inode_data_buffers(mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_keep_errors( - jinode->i_vfs_inode->i_mapping); + err = filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; } } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 38b426c5ed03..17f679aeba7c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_inode_add_write); EXPORT_SYMBOL(jbd2_journal_inode_add_wait); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); @@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) jinode->i_next_transaction = NULL; jinode->i_vfs_inode = inode; jinode->i_flags = 0; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; INIT_LIST_HEAD(&jinode->i_list); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8ca4fddc705f..990e7b5062e7 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) * File inode in the inode list of the handle's transaction */ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, - unsigned long flags) + unsigned long flags, loff_t start_byte, loff_t end_byte) { transaction_t *transaction = handle->h_transaction; journal_t *journal; @@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); - /* - * First check whether inode isn't already on the transaction's - * lists without taking the lock. Note that this check is safe - * without the lock as we cannot race with somebody removing inode - * from the transaction. The reason is that we remove inode from the - * transaction only in journal_release_jbd_inode() and when we commit - * the transaction. We are guarded from the first case by holding - * a reference to the inode. We are safe against the second case - * because if jinode->i_transaction == transaction, commit code - * cannot touch the transaction because we hold reference to it, - * and if jinode->i_next_transaction == transaction, commit code - * will only file the inode where we want it. - */ - if ((jinode->i_transaction == transaction || - jinode->i_next_transaction == transaction) && - (jinode->i_flags & flags) == flags) - return 0; - spin_lock(&journal->j_list_lock); jinode->i_flags |= flags; + + if (jinode->i_dirty_end) { + jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); + jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); + } else { + jinode->i_dirty_start = start_byte; + jinode->i_dirty_end = end_byte; + } + /* Is inode already attached where we need it? */ if (jinode->i_transaction == transaction || jinode->i_next_transaction == transaction) @@ -2631,12 +2622,28 @@ done: int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) { return jbd2_journal_file_inode(handle, jinode, - JI_WRITE_DATA | JI_WAIT_DATA); + JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); } int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) { - return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, + LLONG_MAX); +} + +int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *jinode, loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, + JI_WRITE_DATA | JI_WAIT_DATA, start_byte, + start_byte + length - 1); +} + +int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, + loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, + start_byte, start_byte + length - 1); } /* diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5c04181b7c6d..0e0393e7f41a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -451,6 +451,22 @@ struct jbd2_inode { * @i_flags: Flags of inode [j_list_lock] */ unsigned long i_flags; + + /** + * @i_dirty_start: + * + * Offset in bytes where the dirty range for this inode starts. + * [j_list_lock] + */ + loff_t i_dirty_start; + + /** + * @i_dirty_end: + * + * Inclusive offset in bytes where the dirty range for this inode + * ends. [j_list_lock] + */ + loff_t i_dirty_end; }; struct jbd2_revoke_table_s; @@ -1397,6 +1413,12 @@ extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *inode, loff_t start_byte, + loff_t length); +extern int jbd2_journal_inode_ranged_wait(handle_t *handle, + struct jbd2_inode *inode, loff_t start_byte, + loff_t length); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 9382cde8cd8fb941fc333b644a5772d02e1ff924 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 20 Jun 2019 17:32:21 -0400 Subject: jbd2: drop declaration of journal_sync_buffer() The journal_sync_buffer() function was never carried over from jbd to jbd2. So get rid of the vestigal declaration of this (non-existent) function. Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong --- fs/jbd2/journal.c | 3 --- include/linux/jbd2.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 17f679aeba7c..953990eb70a9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access); EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif EXPORT_SYMBOL(jbd2_journal_flush); EXPORT_SYMBOL(jbd2_journal_revoke); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0e0393e7f41a..df03825ad1a1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1373,7 +1373,6 @@ void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); -extern void journal_sync_buffer (struct buffer_head *); extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned int, unsigned int); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); -- cgit v1.2.3 From 4a96895f74c9633b51427fd080ab70fa62b65bc4 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Thu, 20 Jun 2019 08:24:20 +0200 Subject: tty/serial/8250: use mctrl_gpio helpers This patch permits the usage for GPIOs to control the CTS/RTS/DTR/DSR/DCD/RI signals. Changed by Stefan: Only call mctrl_gpio_init(), if the device has no ACPI companion device to not break existing ACPI based systems. Also only use the mctrl_gpio_ functions when "gpios" is available. Use MSR / MCR <-> TIOCM wrapper functions. Signed-off-by: Yegor Yefremov Signed-off-by: Stefan Roese Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Tested-by: Yegor Yefremov Cc: Mika Westerberg Cc: Andy Shevchenko Cc: Giulio Benetti Cc: Yegor Yefremov Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.txt | 19 +++++++++++++++ drivers/tty/serial/8250/8250.h | 18 +++++++++++++- drivers/tty/serial/8250/8250_core.c | 17 +++++++++++++ drivers/tty/serial/8250/8250_omap.c | 29 +++++++++++++---------- drivers/tty/serial/8250/8250_port.c | 11 ++++++++- drivers/tty/serial/8250/Kconfig | 1 + include/linux/serial_8250.h | 1 + 7 files changed, 81 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt index 3cba12f855b7..20d351f268ef 100644 --- a/Documentation/devicetree/bindings/serial/8250.txt +++ b/Documentation/devicetree/bindings/serial/8250.txt @@ -53,6 +53,9 @@ Optional properties: programmable TX FIFO thresholds. - resets : phandle + reset specifier pairs - overrun-throttle-ms : how long to pause uart rx when input overrun is encountered. +- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD + line respectively. It will use specified GPIO instead of the peripheral + function pin for the UART feature. If unsure, don't specify this property. Note: * fsl,ns16550: @@ -74,3 +77,19 @@ Example: interrupts = <10>; reg-shift = <2>; }; + +Example for OMAP UART using GPIO-based modem control signals: + + uart4: serial@49042000 { + compatible = "ti,omap3-uart"; + reg = <0x49042000 0x400>; + interrupts = <80>; + ti,hwmods = "uart4"; + clock-frequency = <48000000>; + cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>; + rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; + dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>; + dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>; + dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; + rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>; + }; diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 57db8c1689af..33ad9d6de532 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -11,6 +11,8 @@ #include #include +#include "../serial_mctrl_gpio.h" + struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); @@ -214,11 +216,25 @@ static inline int serial8250_MSR_to_TIOCM(int msr) static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { serial_out(up, UART_MCR, value); + + if (up->gpios) + mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value)); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { - return serial_in(up, UART_MCR); + int mctrl; + + mctrl = serial_in(up, UART_MCR); + + if (up->gpios) { + unsigned int mctrl_gpio = 0; + + mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); + mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio); + } + + return mctrl; } #if defined(__alpha__) && !defined(CONFIG_PCI) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index e441221e04b9..a4470771005f 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -14,6 +14,7 @@ * serial8250_register_8250_port() ports */ +#include #include #include #include @@ -982,6 +983,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up) uart = serial8250_find_match_or_unused(&up->port); if (uart && uart->port.type != PORT_8250_CIR) { + struct mctrl_gpios *gpios; + if (uart->port.dev) uart_remove_one_port(&serial8250_reg, &uart->port); @@ -1016,6 +1019,20 @@ int serial8250_register_8250_port(struct uart_8250_port *up) if (up->port.flags & UPF_FIXED_TYPE) uart->port.type = up->port.type; + /* + * Only call mctrl_gpio_init(), if the device has no ACPI + * companion device + */ + if (!has_acpi_companion(uart->port.dev)) { + gpios = mctrl_gpio_init(&uart->port, 0); + if (IS_ERR(gpios)) { + if (PTR_ERR(gpios) != -ENOSYS) + return PTR_ERR(gpios); + } else { + uart->gpios = gpios; + } + } + serial8250_set_defaults(uart); /* Possibly override default I/O functions. */ diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index ed25cfc3be13..3ef65cbd2478 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -141,18 +141,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) serial8250_do_set_mctrl(port, mctrl); - /* - * Turn off autoRTS if RTS is lowered and restore autoRTS setting - * if RTS is raised - */ - lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) - priv->efr |= UART_EFR_RTS; - else - priv->efr &= ~UART_EFR_RTS; - serial_out(up, UART_EFR, priv->efr); - serial_out(up, UART_LCR, lcr); + if (!up->gpios) { + /* + * Turn off autoRTS if RTS is lowered and restore autoRTS + * setting if RTS is raised + */ + lcr = serial_in(up, UART_LCR); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) + priv->efr |= UART_EFR_RTS; + else + priv->efr &= ~UART_EFR_RTS; + serial_out(up, UART_EFR, priv->efr); + serial_out(up, UART_LCR, lcr); + } } /* @@ -453,7 +455,8 @@ static void omap_8250_set_termios(struct uart_port *port, priv->efr = 0; up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF); - if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) { + if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW && + !up->gpios) { /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */ up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS; priv->efr |= UART_EFR_CTS; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 74d81f1701ed..a6fabc7e3b13 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1656,6 +1656,8 @@ static void serial8250_disable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; + mctrl_gpio_disable_ms(up->gpios); + up->ier &= ~UART_IER_MSI; serial_port_out(port, UART_IER, up->ier); } @@ -1668,6 +1670,8 @@ static void serial8250_enable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; + mctrl_gpio_enable_ms(up->gpios); + up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -1939,12 +1943,17 @@ unsigned int serial8250_do_get_mctrl(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int status; + unsigned int val; serial8250_rpm_get(up); status = serial8250_modem_status(up); serial8250_rpm_put(up); - return serial8250_MSR_to_TIOCM(status); + val = serial8250_MSR_to_TIOCM(status); + if (up->gpios) + return mctrl_gpio_get(up->gpios, &val); + + return val; } EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl); diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 296115f6a4d8..509f6a3bb9ff 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -8,6 +8,7 @@ config SERIAL_8250 tristate "8250/16550 and compatible serial support" depends on !S390 select SERIAL_CORE + select SERIAL_MCTRL_GPIO if GPIOLIB ---help--- This selects whether you want to include the driver for the standard serial ports. The standard answer is Y. People who might say N diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 5e0b59422a68..bb2bc99388ca 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -110,6 +110,7 @@ struct uart_8250_port { * if no_console_suspend */ unsigned char probe; + struct mctrl_gpios *gpios; #define UART_PROBE_RSA (1 << 0) /* -- cgit v1.2.3 From 150d71f725fd2f5a0015b7fa8df0816a207d4e4b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:03 -0700 Subject: nvmet-fc: add transport discovery change event callback support This patch adds support for the nvmet discovery_change transport op. In turn, the transport adds it's own LLDD api callback discovery_event op to request the LLDD to generate an RSCN for the discovery change. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 11 +++++++++++ include/linux/nvme-fc-driver.h | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 508661af0f50..1f252c9a953a 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2549,6 +2549,16 @@ nvmet_fc_remove_port(struct nvmet_port *port) kfree(pe); } +static void +nvmet_fc_discovery_chg(struct nvmet_port *port) +{ + struct nvmet_fc_port_entry *pe = port->priv; + struct nvmet_fc_tgtport *tgtport = pe->tgtport; + + if (tgtport && tgtport->ops->discovery_event) + tgtport->ops->discovery_event(&tgtport->fc_target_port); +} + static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_FC, @@ -2557,6 +2567,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .remove_port = nvmet_fc_remove_port, .queue_response = nvmet_fc_fcp_nvme_cmd_done, .delete_ctrl = nvmet_fc_delete_ctrl, + .discovery_chg = nvmet_fc_discovery_chg, }; static int __init nvmet_fc_init_module(void) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index c48e96436f56..98d904961b33 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -791,6 +791,11 @@ struct nvmet_fc_target_port { * nvmefc_tgt_fcp_req. * Entrypoint is Optional. * + * @discovery_event: Called by the transport to generate an RSCN + * change notifications to NVME initiators. The RSCN notifications + * should cause the initiator to rescan the discovery controller + * on the targetport. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -832,6 +837,7 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*discovery_event)(struct nvmet_fc_target_port *tgtport); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From 7a1f46e3f75cff5042dfa1bb80c9929a0e412abc Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 6 Jun 2019 14:30:14 +0900 Subject: nvme: introduce nvme_is_fabrics to check fabrics cmd This patch introduces a nvme_is_fabrics() inline function to check whether or not the given command structure is for fabrics. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/fabrics-cmd.c | 2 +- drivers/nvme/target/fc.c | 2 +- include/linux/nvme.h | 7 ++++++- 5 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5838f7cd53ac..1994d5b42f94 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -578,7 +578,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, switch (ctrl->state) { case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: - if (req->cmd->common.opcode == nvme_fabrics_command && + if (nvme_is_fabrics(req->cmd) && req->cmd->fabrics.fctype == nvme_fabrics_type_connect) return true; break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 43e8c4adc1f4..0587707b1a25 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -873,7 +873,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, status = nvmet_parse_connect_cmd(req); else if (likely(req->sq->qid != 0)) status = nvmet_parse_io_cmd(req); - else if (req->cmd->common.opcode == nvme_fabrics_command) + else if (nvme_is_fabrics(req->cmd)) status = nvmet_parse_fabrics_cmd(req); else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) status = nvmet_parse_discovery_cmd(req); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3b9f79aba98f..d16b55ffe79f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -268,7 +268,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - if (cmd->common.opcode != nvme_fabrics_command) { + if (!nvme_is_fabrics(cmd)) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1f252c9a953a..ce8d819f86cc 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1806,7 +1806,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, */ rspcnt = atomic_inc_return(&fod->queue->zrspcnt); if (!(rspcnt % fod->queue->ersp_ratio) || - sqe->opcode == nvme_fabrics_command || + nvme_is_fabrics((struct nvme_command *) sqe) || xfr_length != fod->req.transfer_len || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8028adacaff3..7080923e78d1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1165,6 +1165,11 @@ struct nvme_command { }; }; +static inline bool nvme_is_fabrics(struct nvme_command *cmd) +{ + return cmd->common.opcode == nvme_fabrics_command; +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -1186,7 +1191,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * * Why can't we simply have a Fabrics In and Fabrics out command? */ - if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + if (unlikely(nvme_is_fabrics(cmd))) return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } -- cgit v1.2.3 From 26f2990d85838caa650744a0ded9e38988a2bd7f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:30 +0900 Subject: nvme-trace: move opcode symbol print to nvme.h The following patches are going to provide the target-side trace which might need these kind of macros. It would be great if it can be shared between host and target side both. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 44 -------------------------------------------- include/linux/nvme.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index e71502d141ed..62ee29107c32 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -16,50 +16,6 @@ #include "nvme.h" -#define nvme_admin_opcode_name(opcode) { opcode, #opcode } -#define show_admin_opcode_name(val) \ - __print_symbolic(val, \ - nvme_admin_opcode_name(nvme_admin_delete_sq), \ - nvme_admin_opcode_name(nvme_admin_create_sq), \ - nvme_admin_opcode_name(nvme_admin_get_log_page), \ - nvme_admin_opcode_name(nvme_admin_delete_cq), \ - nvme_admin_opcode_name(nvme_admin_create_cq), \ - nvme_admin_opcode_name(nvme_admin_identify), \ - nvme_admin_opcode_name(nvme_admin_abort_cmd), \ - nvme_admin_opcode_name(nvme_admin_set_features), \ - nvme_admin_opcode_name(nvme_admin_get_features), \ - nvme_admin_opcode_name(nvme_admin_async_event), \ - nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ - nvme_admin_opcode_name(nvme_admin_activate_fw), \ - nvme_admin_opcode_name(nvme_admin_download_fw), \ - nvme_admin_opcode_name(nvme_admin_ns_attach), \ - nvme_admin_opcode_name(nvme_admin_keep_alive), \ - nvme_admin_opcode_name(nvme_admin_directive_send), \ - nvme_admin_opcode_name(nvme_admin_directive_recv), \ - nvme_admin_opcode_name(nvme_admin_dbbuf), \ - nvme_admin_opcode_name(nvme_admin_format_nvm), \ - nvme_admin_opcode_name(nvme_admin_security_send), \ - nvme_admin_opcode_name(nvme_admin_security_recv), \ - nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) - -#define nvme_opcode_name(opcode) { opcode, #opcode } -#define show_nvm_opcode_name(val) \ - __print_symbolic(val, \ - nvme_opcode_name(nvme_cmd_flush), \ - nvme_opcode_name(nvme_cmd_write), \ - nvme_opcode_name(nvme_cmd_read), \ - nvme_opcode_name(nvme_cmd_write_uncor), \ - nvme_opcode_name(nvme_cmd_compare), \ - nvme_opcode_name(nvme_cmd_write_zeroes), \ - nvme_opcode_name(nvme_cmd_dsm), \ - nvme_opcode_name(nvme_cmd_resv_register), \ - nvme_opcode_name(nvme_cmd_resv_report), \ - nvme_opcode_name(nvme_cmd_resv_acquire), \ - nvme_opcode_name(nvme_cmd_resv_release)) - -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7080923e78d1..86b3d04baf20 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -562,6 +562,22 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_nvm_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + + /* * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier * @@ -794,6 +810,35 @@ enum nvme_admin_opcode { nvme_admin_sanitize_nvm = 0x84, }; +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +#define show_opcode_name(qid, opcode) \ + (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) + enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), -- cgit v1.2.3 From ad795e47cdef078bfd9e48745040d12104005aab Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:31 +0900 Subject: nvme-trace: support for fabrics commands in host-side This patch introduces fabrics commands tracing feature from host-side. This patch does not include any changes for the previous host-side tracing, but just add fabrics commands parsing in cmd=() format. Signed-off-by: Minwoo Im [hch: fixed some whitespace damage] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/trace.h | 20 ++++++++++----- include/linux/nvme.h | 20 ++++++++++++--- 3 files changed, 94 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 14b0d2993cbe..f01ad0fd60bb 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -135,6 +135,69 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, } } +static const char *nvme_trace_fabrics_property_set(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + u64 value = get_unaligned_le64(spc + 8); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx", + attrib, ofst, value); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_connect(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 recfmt = get_unaligned_le16(spc); + u16 qid = get_unaligned_le16(spc + 2); + u16 sqsize = get_unaligned_le16(spc + 4); + u8 cattr = spc[6]; + u32 kato = get_unaligned_le32(spc + 8); + + trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u", + recfmt, qid, sqsize, cattr, kato); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "spcecific=%*ph", 24, spc); + trace_seq_putc(p, 0); + return ret; +} + +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, + u8 fctype, u8 *spc) +{ + switch (fctype) { + case nvme_fabrics_type_property_set: + return nvme_trace_fabrics_property_set(p, spc); + case nvme_fabrics_type_connect: + return nvme_trace_fabrics_connect(p, spc); + case nvme_fabrics_type_property_get: + return nvme_trace_fabrics_property_get(p, spc); + default: + return nvme_trace_fabrics_common(p, spc); + } +} + const char *nvme_trace_disk_name(struct trace_seq *p, char *name) { const char *ret = trace_seq_buffer_ptr(p); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 62ee29107c32..19a18c87fb7b 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -20,11 +20,15 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, + u8 *spc); -#define parse_nvme_cmd(qid, opcode, cdw10) \ - (qid ? \ - nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ - nvme_trace_parse_admin_cmd(p, opcode, cdw10)) +#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \ + ((opcode) == nvme_fabrics_command ? \ + nvme_trace_parse_fabrics_cmd(p, fctype, cdw10) : \ + ((qid) ? \ + nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ + nvme_trace_parse_admin_cmd(p, opcode, cdw10))) const char *nvme_trace_disk_name(struct trace_seq *p, char *name); #define __print_disk_name(name) \ @@ -49,6 +53,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(int, qid) __field(u8, opcode) __field(u8, flags) + __field(u8, fctype) __field(u16, cid) __field(u32, nsid) __field(u64, metadata) @@ -62,6 +67,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); @@ -70,8 +76,10 @@ TRACE_EVENT(nvme_setup_cmd, __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, - show_opcode_name(__entry->qid, __entry->opcode), - parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10)) + show_opcode_name(__entry->qid, __entry->opcode, + __entry->fctype), + parse_nvme_cmd(__entry->qid, __entry->opcode, + __entry->fctype, __entry->cdw10)) ); TRACE_EVENT(nvme_complete_rq, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 86b3d04baf20..d98b2d8baf4e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -836,9 +836,6 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_security_recv), \ nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), @@ -1053,6 +1050,23 @@ enum nvmf_capsule_command { nvme_fabrics_type_property_get = 0x04, }; +#define nvme_fabrics_type_name(type) { type, #type } +#define show_fabrics_type_name(type) \ + __print_symbolic(type, \ + nvme_fabrics_type_name(nvme_fabrics_type_property_set), \ + nvme_fabrics_type_name(nvme_fabrics_type_connect), \ + nvme_fabrics_type_name(nvme_fabrics_type_property_get)) + +/* + * If not fabrics command, fctype will be ignored. + */ +#define show_opcode_name(qid, opcode, fctype) \ + ((opcode) == nvme_fabrics_command ? \ + show_fabrics_type_name(fctype) : \ + ((qid) ? \ + show_nvm_opcode_name(opcode) : \ + show_admin_opcode_name(opcode))) + struct nvmf_common_command { __u8 opcode; __u8 resv1; -- cgit v1.2.3 From 84705f9f8c64cc3d8409ac63e5dd06ed97886fb7 Mon Sep 17 00:00:00 2001 From: Jolly Shah Date: Wed, 19 Jun 2019 13:59:34 -0700 Subject: firmware: xilinx: zynqmp: Remove unused macro ZYNQMP_PM_CAPABILITY_POWER capability is not supported by firmware and hence needs to be removed Signed-off-by: Tejas Patel Signed-off-by: Michal Simek Signed-off-by: Jolly Shah Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-zynqmp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1262ea6a1f4b..778abbbc7d94 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -46,7 +46,6 @@ #define ZYNQMP_PM_CAPABILITY_ACCESS 0x1U #define ZYNQMP_PM_CAPABILITY_CONTEXT 0x2U #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U -#define ZYNQMP_PM_CAPABILITY_POWER 0x8U /* * Firmware FPGA Manager flags -- cgit v1.2.3 From 1c2eb5b2853c9f513690ba6b71072d8eb65da16a Mon Sep 17 00:00:00 2001 From: Vishnu DASA Date: Fri, 24 May 2019 15:13:10 +0000 Subject: VMCI: Fix integer overflow in VMCI handle arrays The VMCI handle array has an integer overflow in vmci_handle_arr_append_entry when it tries to expand the array. This can be triggered from a guest, since the doorbell link hypercall doesn't impose a limit on the number of doorbell handles that a VM can create in the hypervisor, and these handles are stored in a handle array. In this change, we introduce a mandatory max capacity for handle arrays/lists to avoid excessive memory usage. Signed-off-by: Vishnu Dasa Reviewed-by: Adit Ranadive Reviewed-by: Jorgen Hansen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_context.c | 80 +++++++++++++++++-------------- drivers/misc/vmw_vmci/vmci_handle_array.c | 38 ++++++++++----- drivers/misc/vmw_vmci/vmci_handle_array.h | 29 +++++++---- include/linux/vmw_vmci_defs.h | 11 ++++- 4 files changed, 99 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c index 300ed69fe2c7..16695366ec92 100644 --- a/drivers/misc/vmw_vmci/vmci_context.c +++ b/drivers/misc/vmw_vmci/vmci_context.c @@ -21,6 +21,9 @@ #include "vmci_driver.h" #include "vmci_event.h" +/* Use a wide upper bound for the maximum contexts. */ +#define VMCI_MAX_CONTEXTS 2000 + /* * List of current VMCI contexts. Contexts can be added by * vmci_ctx_create() and removed via vmci_ctx_destroy(). @@ -117,19 +120,22 @@ struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags, /* Initialize host-specific VMCI context. */ init_waitqueue_head(&context->host_context.wait_queue); - context->queue_pair_array = vmci_handle_arr_create(0); + context->queue_pair_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT); if (!context->queue_pair_array) { error = -ENOMEM; goto err_free_ctx; } - context->doorbell_array = vmci_handle_arr_create(0); + context->doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); if (!context->doorbell_array) { error = -ENOMEM; goto err_free_qp_array; } - context->pending_doorbell_array = vmci_handle_arr_create(0); + context->pending_doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); if (!context->pending_doorbell_array) { error = -ENOMEM; goto err_free_db_array; @@ -204,7 +210,7 @@ static int ctx_fire_notification(u32 context_id, u32 priv_flags) * We create an array to hold the subscribers we find when * scanning through all contexts. */ - subscriber_array = vmci_handle_arr_create(0); + subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS); if (subscriber_array == NULL) return VMCI_ERROR_NO_MEM; @@ -623,20 +629,26 @@ int vmci_ctx_add_notification(u32 context_id, u32 remote_cid) spin_lock(&context->lock); - list_for_each_entry(n, &context->notifier_list, node) { - if (vmci_handle_is_equal(n->handle, notifier->handle)) { - exists = true; - break; + if (context->n_notifiers < VMCI_MAX_CONTEXTS) { + list_for_each_entry(n, &context->notifier_list, node) { + if (vmci_handle_is_equal(n->handle, notifier->handle)) { + exists = true; + break; + } } - } - if (exists) { - kfree(notifier); - result = VMCI_ERROR_ALREADY_EXISTS; + if (exists) { + kfree(notifier); + result = VMCI_ERROR_ALREADY_EXISTS; + } else { + list_add_tail_rcu(¬ifier->node, + &context->notifier_list); + context->n_notifiers++; + result = VMCI_SUCCESS; + } } else { - list_add_tail_rcu(¬ifier->node, &context->notifier_list); - context->n_notifiers++; - result = VMCI_SUCCESS; + kfree(notifier); + result = VMCI_ERROR_NO_MEM; } spin_unlock(&context->lock); @@ -721,8 +733,7 @@ static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context, u32 *buf_size, void **pbuf) { struct dbell_cpt_state *dbells; - size_t n_doorbells; - int i; + u32 i, n_doorbells; n_doorbells = vmci_handle_arr_get_size(context->doorbell_array); if (n_doorbells > 0) { @@ -860,7 +871,8 @@ int vmci_ctx_rcv_notifications_get(u32 context_id, spin_lock(&context->lock); *db_handle_array = context->pending_doorbell_array; - context->pending_doorbell_array = vmci_handle_arr_create(0); + context->pending_doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); if (!context->pending_doorbell_array) { context->pending_doorbell_array = *db_handle_array; *db_handle_array = NULL; @@ -942,12 +954,11 @@ int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle) return VMCI_ERROR_NOT_FOUND; spin_lock(&context->lock); - if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) { - vmci_handle_arr_append_entry(&context->doorbell_array, handle); - result = VMCI_SUCCESS; - } else { + if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) + result = vmci_handle_arr_append_entry(&context->doorbell_array, + handle); + else result = VMCI_ERROR_DUPLICATE_ENTRY; - } spin_unlock(&context->lock); vmci_ctx_put(context); @@ -1083,15 +1094,16 @@ int vmci_ctx_notify_dbell(u32 src_cid, if (!vmci_handle_arr_has_entry( dst_context->pending_doorbell_array, handle)) { - vmci_handle_arr_append_entry( + result = vmci_handle_arr_append_entry( &dst_context->pending_doorbell_array, handle); - - ctx_signal_notify(dst_context); - wake_up(&dst_context->host_context.wait_queue); - + if (result == VMCI_SUCCESS) { + ctx_signal_notify(dst_context); + wake_up(&dst_context->host_context.wait_queue); + } + } else { + result = VMCI_SUCCESS; } - result = VMCI_SUCCESS; } spin_unlock(&dst_context->lock); } @@ -1118,13 +1130,11 @@ int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle) if (context == NULL || vmci_handle_is_invalid(handle)) return VMCI_ERROR_INVALID_ARGS; - if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) { - vmci_handle_arr_append_entry(&context->queue_pair_array, - handle); - result = VMCI_SUCCESS; - } else { + if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) + result = vmci_handle_arr_append_entry( + &context->queue_pair_array, handle); + else result = VMCI_ERROR_DUPLICATE_ENTRY; - } return result; } diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c index c527388f5d7b..de7fee7ead1b 100644 --- a/drivers/misc/vmw_vmci/vmci_handle_array.c +++ b/drivers/misc/vmw_vmci/vmci_handle_array.c @@ -8,24 +8,29 @@ #include #include "vmci_handle_array.h" -static size_t handle_arr_calc_size(size_t capacity) +static size_t handle_arr_calc_size(u32 capacity) { - return sizeof(struct vmci_handle_arr) + + return VMCI_HANDLE_ARRAY_HEADER_SIZE + capacity * sizeof(struct vmci_handle); } -struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity) +struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity) { struct vmci_handle_arr *array; + if (max_capacity == 0 || capacity > max_capacity) + return NULL; + if (capacity == 0) - capacity = VMCI_HANDLE_ARRAY_DEFAULT_SIZE; + capacity = min((u32)VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY, + max_capacity); array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC); if (!array) return NULL; array->capacity = capacity; + array->max_capacity = max_capacity; array->size = 0; return array; @@ -36,27 +41,34 @@ void vmci_handle_arr_destroy(struct vmci_handle_arr *array) kfree(array); } -void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, - struct vmci_handle handle) +int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle) { struct vmci_handle_arr *array = *array_ptr; if (unlikely(array->size >= array->capacity)) { /* reallocate. */ struct vmci_handle_arr *new_array; - size_t new_capacity = array->capacity * VMCI_ARR_CAP_MULT; - size_t new_size = handle_arr_calc_size(new_capacity); + u32 capacity_bump = min(array->max_capacity - array->capacity, + array->capacity); + size_t new_size = handle_arr_calc_size(array->capacity + + capacity_bump); + + if (array->size >= array->max_capacity) + return VMCI_ERROR_NO_MEM; new_array = krealloc(array, new_size, GFP_ATOMIC); if (!new_array) - return; + return VMCI_ERROR_NO_MEM; - new_array->capacity = new_capacity; + new_array->capacity += capacity_bump; *array_ptr = array = new_array; } array->entries[array->size] = handle; array->size++; + + return VMCI_SUCCESS; } /* @@ -66,7 +78,7 @@ struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, struct vmci_handle entry_handle) { struct vmci_handle handle = VMCI_INVALID_HANDLE; - size_t i; + u32 i; for (i = 0; i < array->size; i++) { if (vmci_handle_is_equal(array->entries[i], entry_handle)) { @@ -101,7 +113,7 @@ struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array) * Handle at given index, VMCI_INVALID_HANDLE if invalid index. */ struct vmci_handle -vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index) +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index) { if (unlikely(index >= array->size)) return VMCI_INVALID_HANDLE; @@ -112,7 +124,7 @@ vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index) bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, struct vmci_handle entry_handle) { - size_t i; + u32 i; for (i = 0; i < array->size; i++) if (vmci_handle_is_equal(array->entries[i], entry_handle)) diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h index bd1559a548e9..96193f85be5b 100644 --- a/drivers/misc/vmw_vmci/vmci_handle_array.h +++ b/drivers/misc/vmw_vmci/vmci_handle_array.h @@ -9,32 +9,41 @@ #define _VMCI_HANDLE_ARRAY_H_ #include +#include #include -#define VMCI_HANDLE_ARRAY_DEFAULT_SIZE 4 -#define VMCI_ARR_CAP_MULT 2 /* Array capacity multiplier */ - struct vmci_handle_arr { - size_t capacity; - size_t size; + u32 capacity; + u32 max_capacity; + u32 size; + u32 pad; struct vmci_handle entries[]; }; -struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity); +#define VMCI_HANDLE_ARRAY_HEADER_SIZE \ + offsetof(struct vmci_handle_arr, entries) +/* Select a default capacity that results in a 64 byte sized array */ +#define VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY 6 +/* Make sure that the max array size can be expressed by a u32 */ +#define VMCI_HANDLE_ARRAY_MAX_CAPACITY \ + ((U32_MAX - VMCI_HANDLE_ARRAY_HEADER_SIZE - 1) / \ + sizeof(struct vmci_handle)) + +struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity); void vmci_handle_arr_destroy(struct vmci_handle_arr *array); -void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, - struct vmci_handle handle); +int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle); struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, struct vmci_handle entry_handle); struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array); struct vmci_handle -vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index); +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index); bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, struct vmci_handle entry_handle); struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array); -static inline size_t vmci_handle_arr_get_size( +static inline u32 vmci_handle_arr_get_size( const struct vmci_handle_arr *array) { return array->size; diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 606504bf376a..fefb5292403b 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -62,9 +62,18 @@ enum { /* * A single VMCI device has an upper limit of 128MB on the amount of - * memory that can be used for queue pairs. + * memory that can be used for queue pairs. Since each queue pair + * consists of at least two pages, the memory limit also dictates the + * number of queue pairs a guest can create. */ #define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024) +#define VMCI_MAX_GUEST_QP_COUNT (VMCI_MAX_GUEST_QP_MEMORY / PAGE_SIZE / 2) + +/* + * There can be at most PAGE_SIZE doorbells since there is one doorbell + * per byte in the doorbell bitmap page. + */ +#define VMCI_MAX_GUEST_DOORBELL_COUNT PAGE_SIZE /* * Queues with pre-mapped data pages must be small, so that we don't pin -- cgit v1.2.3 From 43a38c3f318082839d7e613352d4dae7bbdfcdec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Jun 2019 15:15:04 +0200 Subject: netfilter: fix nf_conntrack_bridge/ipv6 link error When CONFIG_IPV6 is disabled, the bridge netfilter code produces a link error: ERROR: "br_ip6_fragment" [net/bridge/netfilter/nf_conntrack_bridge.ko] undefined! ERROR: "nf_ct_frag6_gather" [net/bridge/netfilter/nf_conntrack_bridge.ko] undefined! The problem is that it assumes that whenever IPV6 is not a loadable module, we can call the functions direction. This is clearly not true when IPV6 is disabled. There are two other functions defined like this in linux/netfilter_ipv6.h, so change them all the same way. Fixes: 764dd163ac92 ("netfilter: nf_conntrack_bridge: add support for IPv6") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 22e6398bc482..7beb681e1ce5 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -75,8 +75,10 @@ static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 1; return v6_ops->chk_addr(net, addr, dev, strict); -#else +#elif IS_BUILTIN(CONFIG_IPV6) return ipv6_chk_addr(net, addr, dev, strict); +#else + return 1; #endif } @@ -113,8 +115,10 @@ static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb, return 1; return v6_ops->br_defrag(net, skb, user); -#else +#elif IS_BUILTIN(CONFIG_IPV6) return nf_ct_frag6_gather(net, skb, user); +#else + return 1; #endif } @@ -138,8 +142,10 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, return 1; return v6_ops->br_fragment(net, sk, skb, data, output); -#else +#elif IS_BUILTIN(CONFIG_IPV6) return br_ip6_fragment(net, sk, skb, data, output); +#else + return 1; #endif } @@ -154,8 +160,10 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) return -EHOSTUNREACH; return v6_ops->route_me_harder(net, skb); -#else +#elif IS_BUILTIN(CONFIG_IPV6) return ip6_route_me_harder(net, skb); +#else + return -EHOSTUNREACH; #endif } -- cgit v1.2.3 From a78cf9657ba5426f54aa93a067c10d097944c082 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 15 Jun 2019 10:23:57 +1000 Subject: PCI/ACPI: Evaluate PCI Boot Configuration _DSM Evaluate _DSM Function #5, the "PCI Boot Configuration" function. If the result is 0, the OS should preserve any resource assignments made by the firmware. Link: https://lore.kernel.org/r/20190615002359.29577-2-benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas --- drivers/acpi/pci_root.c | 12 ++++++++++++ include/linux/pci-acpi.h | 7 ++++--- include/linux/pci.h | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index c36781a9b493..0d57f817ef1e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -894,6 +894,7 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, int node = acpi_get_node(device->handle); struct pci_bus *bus; struct pci_host_bridge *host_bridge; + union acpi_object *obj; info->root = root; info->bridge = device; @@ -930,6 +931,17 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL)) host_bridge->native_ltr = 0; + /* + * Evaluate the "PCI Boot Configuration" _DSM Function. If it + * exists and returns 0, we must preserve any PCI resource + * assignments made by firmware for this host bridge. + */ + obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 1, + IGNORE_PCI_BOOT_CONFIG_DSM, NULL); + if (obj && obj->type == ACPI_TYPE_INTEGER && obj->integer.value == 0) + host_bridge->preserve_config = 1; + ACPI_FREE(obj); + pci_scan_child_bus(bus); pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info, info); diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 8082b612f561..62b7fdcc661c 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -107,9 +107,10 @@ static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { } #endif extern const guid_t pci_acpi_dsm_guid; -#define DEVICE_LABEL_DSM 0x07 -#define RESET_DELAY_DSM 0x08 -#define FUNCTION_DELAY_DSM 0x09 +#define IGNORE_PCI_BOOT_CONFIG_DSM 0x05 +#define DEVICE_LABEL_DSM 0x07 +#define RESET_DELAY_DSM 0x08 +#define FUNCTION_DELAY_DSM 0x09 #else /* CONFIG_ACPI */ static inline void acpi_pci_add_bus(struct pci_bus *bus) { } diff --git a/include/linux/pci.h b/include/linux/pci.h index 4a5a84d7bdd4..5e2b309363a3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -505,6 +505,8 @@ struct pci_host_bridge { unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */ unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ + unsigned int preserve_config:1; /* Preserve FW resource setup */ + /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, -- cgit v1.2.3 From 4cfd218855923a07dc02a5bec3d3bb37a118ebc2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 18 Jun 2019 23:13:48 +0200 Subject: PCI: let pci_disable_link_state propagate errors Drivers may rely on pci_disable_link_state() having disabled certain ASPM link states. If OS can't control ASPM then pci_disable_link_state() turns into a no-op w/o informing the caller. The driver therefore may falsely assume the respective ASPM link states are disabled. Let pci_disable_link_state() propagate errors to the caller, enabling the caller to react accordingly. Signed-off-by: Heiner Kallweit Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/pci/pcie/aspm.c | 20 +++++++++++--------- include/linux/pci-aspm.h | 7 ++++--- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index fd4cb75088f9..e44af7f4d37f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1062,18 +1062,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev) up_read(&pci_bus_sem); } -static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) +static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) { struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link; if (!pci_is_pcie(pdev)) - return; + return 0; if (pdev->has_secondary_link) parent = pdev; if (!parent || !parent->link_state) - return; + return -EINVAL; /* * A driver requested that ASPM be disabled on this device, but @@ -1085,7 +1085,7 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) */ if (aspm_disabled) { pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); - return; + return -EPERM; } if (sem) @@ -1105,11 +1105,13 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) mutex_unlock(&aspm_lock); if (sem) up_read(&pci_bus_sem); + + return 0; } -void pci_disable_link_state_locked(struct pci_dev *pdev, int state) +int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { - __pci_disable_link_state(pdev, state, false); + return __pci_disable_link_state(pdev, state, false); } EXPORT_SYMBOL(pci_disable_link_state_locked); @@ -1117,14 +1119,14 @@ EXPORT_SYMBOL(pci_disable_link_state_locked); * pci_disable_link_state - Disable device's link state, so the link will * never enter specific states. Note that if the BIOS didn't grant ASPM * control to the OS, this does nothing because we can't touch the LNKCTL - * register. + * register. Returns 0 or a negative errno. * * @pdev: PCI device * @state: ASPM link state to disable */ -void pci_disable_link_state(struct pci_dev *pdev, int state) +int pci_disable_link_state(struct pci_dev *pdev, int state) { - __pci_disable_link_state(pdev, state, true); + return __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state); diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index df28af5cef21..67064145d76e 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -24,11 +24,12 @@ #define PCIE_LINK_STATE_CLKPM 4 #ifdef CONFIG_PCIEASPM -void pci_disable_link_state(struct pci_dev *pdev, int state); -void pci_disable_link_state_locked(struct pci_dev *pdev, int state); +int pci_disable_link_state(struct pci_dev *pdev, int state); +int pci_disable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); #else -static inline void pci_disable_link_state(struct pci_dev *pdev, int state) { } +static inline int pci_disable_link_state(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } #endif -- cgit v1.2.3 From d308dfbf62eff897d71968d764f21a78678ee0a5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 18 Jun 2019 12:58:33 +0200 Subject: i2c: mux/i801: Switch to use descriptor passing This switches the i801 GPIO mux to use GPIO descriptors for handling the GPIO lines. The previous hack which was reaching inside the GPIO chips etc cannot live on. We pass descriptors along with the GPIO mux device at creation instead. The GPIO mux was only used by way of platform data with a platform device from one place in the kernel: the i801 i2c bus driver. Let's just associate the GPIO descriptor table with the actual device like everyone else and dynamically create a descriptor table passed along with the GPIO i2c mux. This enables simplification of the GPIO i2c mux driver to use only the descriptor API and the OF probe path gets simplified in the process. The i801 driver was registering the GPIO i2c mux with PLATFORM_DEVID_AUTO which would make it hard to predict the device name and assign the descriptor table properly, but this seems to be a mistake to begin with: all of the GPIO mux devices are hardcoded to look up GPIO lines from the "gpio_ich" GPIO chip. If there are more than one mux, there is certainly more than one gpio chip as well, and then we have more serious problems. Switch to PLATFORM_DEVID_NONE instead. There can be only one. Cc: Mika Westerberg Cc: Andy Shevchenko Cc: Peter Rosin Cc: Jean Delvare Signed-off-by: Serge Semin Signed-off-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg [Removed a newline, suggested by Andy. /Peter] Signed-off-by: Peter Rosin --- drivers/i2c/busses/i2c-i801.c | 37 +++++++-- drivers/i2c/muxes/i2c-mux-gpio.c | 116 ++++++++--------------------- include/linux/platform_data/i2c-mux-gpio.h | 7 -- 3 files changed, 60 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 679c6c41f64b..bf484cd775ec 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -107,7 +107,7 @@ #include #if IS_ENABLED(CONFIG_I2C_MUX_GPIO) && defined CONFIG_DMI -#include +#include #include #endif @@ -274,6 +274,7 @@ struct i801_priv { #if IS_ENABLED(CONFIG_I2C_MUX_GPIO) && defined CONFIG_DMI const struct i801_mux_config *mux_drvdata; struct platform_device *mux_pdev; + struct gpiod_lookup_table *lookup; #endif struct platform_device *tco_pdev; @@ -1258,7 +1259,8 @@ static int i801_add_mux(struct i801_priv *priv) struct device *dev = &priv->adapter.dev; const struct i801_mux_config *mux_config; struct i2c_mux_gpio_platform_data gpio_data; - int err; + struct gpiod_lookup_table *lookup; + int err, i; if (!priv->mux_drvdata) return 0; @@ -1270,17 +1272,36 @@ static int i801_add_mux(struct i801_priv *priv) gpio_data.values = mux_config->values; gpio_data.n_values = mux_config->n_values; gpio_data.classes = mux_config->classes; - gpio_data.gpio_chip = mux_config->gpio_chip; - gpio_data.gpios = mux_config->gpios; - gpio_data.n_gpios = mux_config->n_gpios; gpio_data.idle = I2C_MUX_GPIO_NO_IDLE; - /* Register the mux device */ + /* Register GPIO descriptor lookup table */ + lookup = devm_kzalloc(dev, + struct_size(lookup, table, mux_config->n_gpios), + GFP_KERNEL); + if (!lookup) + return -ENOMEM; + lookup->dev_id = "i2c-mux-gpio"; + for (i = 0; i < mux_config->n_gpios; i++) { + lookup->table[i].chip_label = mux_config->gpio_chip; + lookup->table[i].chip_hwnum = mux_config->gpios[i]; + lookup->table[i].con_id = "mux"; + } + gpiod_add_lookup_table(lookup); + priv->lookup = lookup; + + /* + * Register the mux device, we use PLATFORM_DEVID_NONE here + * because since we are referring to the GPIO chip by name we are + * anyways in deep trouble if there is more than one of these + * devices, and there should likely only be one platform controller + * hub. + */ priv->mux_pdev = platform_device_register_data(dev, "i2c-mux-gpio", - PLATFORM_DEVID_AUTO, &gpio_data, + PLATFORM_DEVID_NONE, &gpio_data, sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { err = PTR_ERR(priv->mux_pdev); + gpiod_remove_lookup_table(lookup); priv->mux_pdev = NULL; dev_err(dev, "Failed to register i2c-mux-gpio device\n"); return err; @@ -1293,6 +1314,8 @@ static void i801_del_mux(struct i801_priv *priv) { if (priv->mux_pdev) platform_device_unregister(priv->mux_pdev); + if (priv->lookup) + gpiod_remove_lookup_table(priv->lookup); } static unsigned int i801_get_adapter_class(struct i801_priv *priv) diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c index 13882a2a4f60..fd482feafb19 100644 --- a/drivers/i2c/muxes/i2c-mux-gpio.c +++ b/drivers/i2c/muxes/i2c-mux-gpio.c @@ -14,13 +14,14 @@ #include #include #include -#include +#include +#include +/* FIXME: stop poking around inside gpiolib */ #include "../../gpio/gpiolib.h" -#include struct gpiomux { struct i2c_mux_gpio_platform_data data; - unsigned gpio_base; + int ngpios; struct gpio_desc **gpios; }; @@ -30,8 +31,7 @@ static void i2c_mux_gpio_set(const struct gpiomux *mux, unsigned val) values[0] = val; - gpiod_set_array_value_cansleep(mux->data.n_gpios, mux->gpios, NULL, - values); + gpiod_set_array_value_cansleep(mux->ngpios, mux->gpios, NULL, values); } static int i2c_mux_gpio_select(struct i2c_mux_core *muxc, u32 chan) @@ -52,12 +52,6 @@ static int i2c_mux_gpio_deselect(struct i2c_mux_core *muxc, u32 chan) return 0; } -static int match_gpio_chip_by_label(struct gpio_chip *chip, - void *data) -{ - return !strcmp(chip->label, data); -} - #ifdef CONFIG_OF static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, struct platform_device *pdev) @@ -65,8 +59,8 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, struct device_node *np = pdev->dev.of_node; struct device_node *adapter_np, *child; struct i2c_adapter *adapter; - unsigned *values, *gpios; - int i = 0, ret; + unsigned *values; + int i = 0; if (!np) return -ENODEV; @@ -103,29 +97,6 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, if (of_property_read_u32(np, "idle-state", &mux->data.idle)) mux->data.idle = I2C_MUX_GPIO_NO_IDLE; - mux->data.n_gpios = of_gpio_named_count(np, "mux-gpios"); - if (mux->data.n_gpios < 0) { - dev_err(&pdev->dev, "Missing mux-gpios property in the DT.\n"); - return -EINVAL; - } - - gpios = devm_kcalloc(&pdev->dev, - mux->data.n_gpios, sizeof(*mux->data.gpios), - GFP_KERNEL); - if (!gpios) { - dev_err(&pdev->dev, "Cannot allocate gpios array"); - return -ENOMEM; - } - - for (i = 0; i < mux->data.n_gpios; i++) { - ret = of_get_named_gpio(np, "mux-gpios", i); - if (ret < 0) - return ret; - gpios[i] = ret; - } - - mux->data.gpios = gpios; - return 0; } #else @@ -142,8 +113,8 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) struct gpiomux *mux; struct i2c_adapter *parent; struct i2c_adapter *root; - unsigned initial_state, gpio_base; - int i, ret; + unsigned initial_state; + int i, ngpios, ret; mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL); if (!mux) @@ -158,29 +129,19 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) sizeof(mux->data)); } - /* - * If a GPIO chip name is provided, the GPIO pin numbers provided are - * relative to its base GPIO number. Otherwise they are absolute. - */ - if (mux->data.gpio_chip) { - struct gpio_chip *gpio; - - gpio = gpiochip_find(mux->data.gpio_chip, - match_gpio_chip_by_label); - if (!gpio) - return -EPROBE_DEFER; - - gpio_base = gpio->base; - } else { - gpio_base = 0; + ngpios = gpiod_count(&pdev->dev, "mux"); + if (ngpios <= 0) { + dev_err(&pdev->dev, "no valid gpios provided\n"); + return ngpios ?: -EINVAL; } + mux->ngpios = ngpios; parent = i2c_get_adapter(mux->data.parent); if (!parent) return -EPROBE_DEFER; muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, - mux->data.n_gpios * sizeof(*mux->gpios), 0, + ngpios * sizeof(*mux->gpios), 0, i2c_mux_gpio_select, NULL); if (!muxc) { ret = -ENOMEM; @@ -194,7 +155,6 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) root = i2c_root_adapter(&parent->dev); muxc->mux_locked = true; - mux->gpio_base = gpio_base; if (mux->data.idle != I2C_MUX_GPIO_NO_IDLE) { initial_state = mux->data.idle; @@ -203,34 +163,28 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) initial_state = mux->data.values[0]; } - for (i = 0; i < mux->data.n_gpios; i++) { + for (i = 0; i < ngpios; i++) { struct device *gpio_dev; - struct gpio_desc *gpio_desc; - - ret = gpio_request(gpio_base + mux->data.gpios[i], "i2c-mux-gpio"); - if (ret) { - dev_err(&pdev->dev, "Failed to request GPIO %d\n", - mux->data.gpios[i]); - goto err_request_gpio; + struct gpio_desc *gpiod; + enum gpiod_flags flag; + + if (initial_state & BIT(i)) + flag = GPIOD_OUT_HIGH; + else + flag = GPIOD_OUT_LOW; + gpiod = devm_gpiod_get_index(&pdev->dev, "mux", i, flag); + if (IS_ERR(gpiod)) { + ret = PTR_ERR(gpiod); + goto alloc_failed; } - ret = gpio_direction_output(gpio_base + mux->data.gpios[i], - initial_state & (1 << i)); - if (ret) { - dev_err(&pdev->dev, - "Failed to set direction of GPIO %d to output\n", - mux->data.gpios[i]); - i++; /* gpio_request above succeeded, so must free */ - goto err_request_gpio; - } - - gpio_desc = gpio_to_desc(gpio_base + mux->data.gpios[i]); - mux->gpios[i] = gpio_desc; + mux->gpios[i] = gpiod; if (!muxc->mux_locked) continue; - gpio_dev = &gpio_desc->gdev->dev; + /* FIXME: find a proper way to access the GPIO device */ + gpio_dev = &gpiod->gdev->dev; muxc->mux_locked = i2c_root_adapter(gpio_dev) == root; } @@ -253,10 +207,6 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) add_adapter_failed: i2c_mux_del_adapters(muxc); - i = mux->data.n_gpios; -err_request_gpio: - for (; i > 0; i--) - gpio_free(gpio_base + mux->data.gpios[i - 1]); alloc_failed: i2c_put_adapter(parent); @@ -266,14 +216,8 @@ alloc_failed: static int i2c_mux_gpio_remove(struct platform_device *pdev) { struct i2c_mux_core *muxc = platform_get_drvdata(pdev); - struct gpiomux *mux = i2c_mux_priv(muxc); - int i; i2c_mux_del_adapters(muxc); - - for (i = 0; i < mux->data.n_gpios; i++) - gpio_free(mux->gpio_base + mux->data.gpios[i]); - i2c_put_adapter(muxc->parent); return 0; diff --git a/include/linux/platform_data/i2c-mux-gpio.h b/include/linux/platform_data/i2c-mux-gpio.h index 4406108201fe..28f288eed652 100644 --- a/include/linux/platform_data/i2c-mux-gpio.h +++ b/include/linux/platform_data/i2c-mux-gpio.h @@ -22,10 +22,6 @@ * position * @n_values: Number of multiplexer positions (busses to instantiate) * @classes: Optional I2C auto-detection classes - * @gpio_chip: Optional GPIO chip name; if set, GPIO pin numbers are given - * relative to the base GPIO number of that chip - * @gpios: Array of GPIO numbers used to control MUX - * @n_gpios: Number of GPIOs used to control MUX * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used */ struct i2c_mux_gpio_platform_data { @@ -34,9 +30,6 @@ struct i2c_mux_gpio_platform_data { const unsigned *values; int n_values; const unsigned *classes; - char *gpio_chip; - const unsigned *gpios; - int n_gpios; unsigned idle; }; -- cgit v1.2.3 From e9bea8f98a539080070e3eff70a1731ce0ffdc8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Jun 2019 00:48:15 +0200 Subject: PM: sleep: Update struct wakeup_source documentation The kerneldoc comment for struct wakeup_source has become outdated, so fix that. Signed-off-by: Rafael J. Wysocki Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_wakeup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index ce57771fab9b..91027602d137 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -36,7 +36,7 @@ struct wake_irq; * @expire_count: Number of times the wakeup source's timeout has expired. * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. - * @has_timeout: The wakeup source has been activated with a timeout. + * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time. */ struct wakeup_source { const char *name; -- cgit v1.2.3 From 9285ec4c8b61d4930a575081abeba2cd4f449a74 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Jun 2019 22:32:48 +0200 Subject: timekeeping: Use proper clock specifier names in functions This makes boot uniformly boottime and tai uniformly clocktai, to address the remaining oversights. Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20190621203249.3909-2-Jason@zx2c4.com --- Documentation/core-api/timekeeping.rst | 2 +- arch/x86/kvm/pmu.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/iio/humidity/dht11.c | 8 ++++---- drivers/iio/industrialio-core.c | 4 ++-- drivers/infiniband/hw/mlx4/alias_GUID.c | 6 +++--- drivers/leds/trigger/ledtrig-activity.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/ti/wlcore/rx.c | 2 +- drivers/net/wireless/ti/wlcore/tx.c | 2 +- drivers/net/wireless/virt_wifi.c | 2 +- include/linux/timekeeping.h | 4 ++-- include/net/cfg80211.h | 2 +- kernel/bpf/syscall.c | 2 +- kernel/events/core.c | 4 ++-- kernel/fork.c | 2 +- 22 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst index 93cbeb9daec0..4d92b1ac8024 100644 --- a/Documentation/core-api/timekeeping.rst +++ b/Documentation/core-api/timekeeping.rst @@ -65,7 +65,7 @@ different format depending on what is required by the user: .. c:function:: u64 ktime_get_ns( void ) u64 ktime_get_boottime_ns( void ) u64 ktime_get_real_ns( void ) - u64 ktime_get_tai_ns( void ) + u64 ktime_get_clocktai_ns( void ) u64 ktime_get_raw_ns( void ) Same as the plain ktime_get functions, but returning a u64 number diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index dd745b58ffd8..1aea628ef6b8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -264,10 +264,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 83aefd759846..81a0914a1ec1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1731,7 +1731,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2073,7 +2073,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2089,7 +2089,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2188,7 +2188,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -9018,7 +9018,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9246,7 +9246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..dd6b4b0b5f30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -837,7 +837,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, /* No access to rdtsc. Using raw monotonic time */ args->cpu_clock_counter = ktime_get_raw_ns(); - args->system_clock_counter = ktime_get_boot_ns(); + args->system_clock_counter = ktime_get_boottime_ns(); /* Since the counter is in nano-seconds we use 1GHz frequency */ args->system_clock_freq = 1000000000; diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index c8159205c77d..4e22b3c3e488 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -149,7 +149,7 @@ static int dht11_decode(struct dht11 *dht11, int offset) return -EIO; } - dht11->timestamp = ktime_get_boot_ns(); + dht11->timestamp = ktime_get_boottime_ns(); if (hum_int < 4) { /* DHT22: 100000 = (3*256+232)*100 */ dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) * ((temp_int & 0x80) ? -100 : 100); @@ -177,7 +177,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data) /* TODO: Consider making the handler safe for IRQ sharing */ if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) { - dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns(); + dht11->edges[dht11->num_edges].ts = ktime_get_boottime_ns(); dht11->edges[dht11->num_edges++].value = gpio_get_value(dht11->gpio); @@ -196,7 +196,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev, int ret, timeres, offset; mutex_lock(&dht11->lock); - if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) { + if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boottime_ns()) { timeres = ktime_get_resolution_ns(); dev_dbg(dht11->dev, "current timeresolution: %dns\n", timeres); if (timeres > DHT11_MIN_TIMERES) { @@ -322,7 +322,7 @@ static int dht11_probe(struct platform_device *pdev) return -EINVAL; } - dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1; + dht11->timestamp = ktime_get_boottime_ns() - DHT11_DATA_VALID_TIME - 1; dht11->num_edges = -1; platform_set_drvdata(pdev, iio); diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index f5a4581302f4..16008f862d19 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -231,9 +231,9 @@ s64 iio_get_time_ns(const struct iio_dev *indio_dev) ktime_get_coarse_ts64(&tp); return timespec64_to_ns(&tp); case CLOCK_BOOTTIME: - return ktime_get_boot_ns(); + return ktime_get_boottime_ns(); case CLOCK_TAI: - return ktime_get_tai_ns(); + return ktime_get_clocktai_ns(); default: BUG(); } diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 2a0b59a4b6eb..cca414ecfcd5 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status, if (status) { pr_debug("(port: %d) failed: status = %d\n", cb_ctx->port, status); - rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; + rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC; goto out; } @@ -416,7 +416,7 @@ next_entry: be64_to_cpu((__force __be64)rec->guid_indexes), be64_to_cpu((__force __be64)applied_guid_indexes), be64_to_cpu((__force __be64)declined_guid_indexes)); - rec->time_to_run = ktime_get_boot_ns() + + rec->time_to_run = ktime_get_boottime_ns() + resched_delay_sec * NSEC_PER_SEC; } else { rec->status = MLX4_GUID_INFO_STATUS_SET; @@ -709,7 +709,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, } } if (resched_delay_sec) { - u64 curr_time = ktime_get_boot_ns(); + u64 curr_time = ktime_get_boottime_ns(); *resched_delay_sec = (low_record_time < curr_time) ? 0 : div_u64((low_record_time - curr_time), NSEC_PER_SEC); diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c index bcbf41c90c30..0f130dd998b3 100644 --- a/drivers/leds/trigger/ledtrig-activity.c +++ b/drivers/leds/trigger/ledtrig-activity.c @@ -73,7 +73,7 @@ static void led_activity_function(struct timer_list *t) * down to 16us, ensuring we won't overflow 32-bit computations below * even up to 3k CPUs, while keeping divides cheap on smaller systems. */ - curr_boot = ktime_get_boot_ns() * cpus; + curr_boot = ktime_get_boottime_ns() * cpus; diff_boot = (curr_boot - activity_data->last_boot) >> 16; diff_used = (curr_used - activity_data->last_used) >> 16; activity_data->last_boot = curr_boot; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index fec38a47696e..9f4b117db9d7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -93,7 +93,7 @@ void iwl_mvm_ftm_restart(struct iwl_mvm *mvm) struct cfg80211_pmsr_result result = { .status = NL80211_PMSR_STATUS_FAILURE, .final = 1, - .host_time = ktime_get_boot_ns(), + .host_time = ktime_get_boottime_ns(), .type = NL80211_PMSR_TYPE_FTM, }; int i; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index fbd3014e8b82..160b0db27103 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -555,7 +555,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, if (unlikely(ieee80211_is_beacon(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control))) - rx_status->boottime_ns = ktime_get_boot_ns(); + rx_status->boottime_ns = ktime_get_boottime_ns(); /* Take a reference briefly to kick off a d0i3 entry delay so * we can handle bursts of RX packets without toggling the diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1824566d08fc..64f950501287 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1684,7 +1684,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (unlikely(ieee80211_is_beacon(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control))) - rx_status->boottime_ns = ktime_get_boot_ns(); + rx_status->boottime_ns = ktime_get_boottime_ns(); } if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index b9914efc55c4..724a25ab32f2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1443,7 +1443,7 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) } *gp2 = iwl_mvm_get_systime(mvm); - *boottime = ktime_get_boot_ns(); + *boottime = ktime_get_boottime_ns(); if (!ps_disabled) { mvm->ps_disabled = ps_disabled; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 60ca13e0f15b..52ee165d6f1d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1274,7 +1274,7 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw, */ if (ieee80211_is_beacon(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control)) { - rx_status.boottime_ns = ktime_get_boot_ns(); + rx_status.boottime_ns = ktime_get_boottime_ns(); now = data->abs_bcn_ts; } else { now = mac80211_hwsim_get_tsf_raw(); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index c9a485ecee7b..b74dc8bc9755 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -483,7 +483,7 @@ static int wlcore_fw_status(struct wl1271 *wl, struct wl_fw_status *status) } /* update the host-chipset time offset */ - wl->time_offset = (ktime_get_boot_ns() >> 10) - + wl->time_offset = (ktime_get_boottime_ns() >> 10) - (s64)(status->fw_localtime); wl->fw_fast_lnk_map = status->link_fast_bitmap; diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c index d96bb602fae6..307fab21050b 100644 --- a/drivers/net/wireless/ti/wlcore/rx.c +++ b/drivers/net/wireless/ti/wlcore/rx.c @@ -93,7 +93,7 @@ static void wl1271_rx_status(struct wl1271 *wl, } if (beacon || probe_rsp) - status->boottime_ns = ktime_get_boot_ns(); + status->boottime_ns = ktime_get_boottime_ns(); if (beacon) wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel, diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 057c6be330e7..90e56d4c3df3 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -273,7 +273,7 @@ static void wl1271_tx_fill_hdr(struct wl1271 *wl, struct wl12xx_vif *wlvif, } /* configure packet life time */ - hosttime = (ktime_get_boot_ns() >> 10); + hosttime = (ktime_get_boottime_ns() >> 10); desc->start_time = cpu_to_le32(hosttime - wl->time_offset); is_dummy = wl12xx_is_dummy_packet(wl, skb); diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 606999f102eb..be92e1220284 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -172,7 +172,7 @@ static void virt_wifi_scan_result(struct work_struct *work) informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, CFG80211_BSS_FTYPE_PRESP, fake_router_bssid, - ktime_get_boot_ns(), + ktime_get_boottime_ns(), WLAN_CAPABILITY_ESS, 0, (void *)&ssid, sizeof(ssid), DBM_TO_MBM(-50), GFP_KERNEL); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index a8ab0f143ac4..fd6123722ea8 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -131,12 +131,12 @@ static inline u64 ktime_get_real_ns(void) return ktime_to_ns(ktime_get_real()); } -static inline u64 ktime_get_boot_ns(void) +static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } -static inline u64 ktime_get_tai_ns(void) +static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 87dae868707e..f8058e92f59d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2010,7 +2010,7 @@ enum cfg80211_signal_type { * received by the device (not just by the host, in case it was * buffered on the device) and be accurate to about 10ms. * If the frame isn't buffered, just passing the return value of - * ktime_get_boot_ns() is likely appropriate. + * ktime_get_boottime_ns() is likely appropriate. * @parent_tsf: the time at the start of reception of the first octet of the * timestamp field of the frame. The time is the TSF of the BSS specified * by %parent_bssid. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ef63d26622f2..96c8928b468b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1666,7 +1666,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (err < 0) goto free_prog; - prog->aux->load_time = ktime_get_boot_ns(); + prog->aux->load_time = ktime_get_boottime_ns(); err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); if (err) goto free_prog; diff --git a/kernel/events/core.c b/kernel/events/core.c index abbd4b3b96c2..e2d014395fc6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10680,11 +10680,11 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) break; case CLOCK_BOOTTIME: - event->clock = &ktime_get_boot_ns; + event->clock = &ktime_get_boottime_ns; break; case CLOCK_TAI: - event->clock = &ktime_get_tai_ns; + event->clock = &ktime_get_clocktai_ns; break; default: diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..4722f1a320bf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2139,7 +2139,7 @@ static __latent_entropy struct task_struct *copy_process( */ p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boot_ns(); + p->real_start_time = ktime_get_boottime_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. -- cgit v1.2.3 From 4c54294d01e605a9f992361b924c5d8b12822a6d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Jun 2019 22:32:49 +0200 Subject: timekeeping: Add missing _ns functions for coarse accessors This further unifies the accessors for the fast and coarse functions, so that the same types of functions are available for each. There was also a bit of confusion with the documentation, which prior advertised a function that has never existed. Finally, the vanilla ktime_get_coarse() was omitted from the API originally, so this fills this oversight. Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20190621203249.3909-3-Jason@zx2c4.com --- Documentation/core-api/timekeeping.rst | 10 +++++++--- include/linux/timekeeping.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst index 4d92b1ac8024..15fc58e85ef9 100644 --- a/Documentation/core-api/timekeeping.rst +++ b/Documentation/core-api/timekeeping.rst @@ -99,16 +99,20 @@ Coarse and fast_ns access Some additional variants exist for more specialized cases: -.. c:function:: ktime_t ktime_get_coarse_boottime( void ) +.. c:function:: ktime_t ktime_get_coarse( void ) + ktime_t ktime_get_coarse_boottime( void ) ktime_t ktime_get_coarse_real( void ) ktime_t ktime_get_coarse_clocktai( void ) - ktime_t ktime_get_coarse_raw( void ) + +.. c:function:: u64 ktime_get_coarse_ns( void ) + u64 ktime_get_coarse_boot_ns( void ) + u64 ktime_get_coarse_real_ns( void ) + u64 ktime_get_coarse_clocktai_ns( void ) .. c:function:: void ktime_get_coarse_ts64( struct timespec64 * ) void ktime_get_coarse_boottime_ts64( struct timespec64 * ) void ktime_get_coarse_real_ts64( struct timespec64 * ) void ktime_get_coarse_clocktai_ts64( struct timespec64 * ) - void ktime_get_coarse_raw_ts64( struct timespec64 * ) These are quicker than the non-coarse versions, but less accurate, corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fd6123722ea8..dcffc00755f2 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -113,6 +113,34 @@ static inline ktime_t ktime_get_coarse_clocktai(void) return ktime_get_coarse_with_offset(TK_OFFS_TAI); } +static inline ktime_t ktime_get_coarse(void) +{ + struct timespec64 ts; + + ktime_get_coarse_ts64(&ts); + return timespec64_to_ktime(ts); +} + +static inline u64 ktime_get_coarse_ns(void) +{ + return ktime_to_ns(ktime_get_coarse()); +} + +static inline u64 ktime_get_coarse_real_ns(void) +{ + return ktime_to_ns(ktime_get_coarse_real()); +} + +static inline u64 ktime_get_coarse_boot_ns(void) +{ + return ktime_to_ns(ktime_get_coarse_boottime()); +} + +static inline u64 ktime_get_coarse_clocktai_ns(void) +{ + return ktime_to_ns(ktime_get_coarse_clocktai()); +} + /** * ktime_mono_to_real - Convert monotonic time to clock realtime */ -- cgit v1.2.3 From 62de37da9f382455b983f2f92b10012109005278 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 20 Jun 2019 15:26:29 +0300 Subject: mtd: spi-nor: intel-spi: Convert to use SPDX identifier This gets rid of the license boilerplate duplicated in each file. No functional changes intended. Signed-off-by: Mika Westerberg Signed-off-by: Tudor Ambarus --- drivers/mtd/spi-nor/intel-spi-pci.c | 5 +---- drivers/mtd/spi-nor/intel-spi-platform.c | 5 +---- drivers/mtd/spi-nor/intel-spi.c | 5 +---- drivers/mtd/spi-nor/intel-spi.h | 5 +---- include/linux/platform_data/intel-spi.h | 5 +---- 5 files changed, 5 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/spi-nor/intel-spi-pci.c b/drivers/mtd/spi-nor/intel-spi-pci.c index 578f0c74e536..1b9c2d99ba38 100644 --- a/drivers/mtd/spi-nor/intel-spi-pci.c +++ b/drivers/mtd/spi-nor/intel-spi-pci.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Intel PCH/PCU SPI flash PCI driver. * * Copyright (C) 2016, Intel Corporation * Author: Mika Westerberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/mtd/spi-nor/intel-spi-platform.c b/drivers/mtd/spi-nor/intel-spi-platform.c index 5c943df9398f..25b18804e9bb 100644 --- a/drivers/mtd/spi-nor/intel-spi-platform.c +++ b/drivers/mtd/spi-nor/intel-spi-platform.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Intel PCH/PCU SPI flash platform driver. * * Copyright (C) 2016, Intel Corporation * Author: Mika Westerberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c index d60cbf23d9aa..021cef930f9f 100644 --- a/drivers/mtd/spi-nor/intel-spi.c +++ b/drivers/mtd/spi-nor/intel-spi.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Intel PCH/PCU SPI flash driver. * * Copyright (C) 2016, Intel Corporation * Author: Mika Westerberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/mtd/spi-nor/intel-spi.h b/drivers/mtd/spi-nor/intel-spi.h index 5ab7dc250050..b03bf296fda3 100644 --- a/drivers/mtd/spi-nor/intel-spi.h +++ b/drivers/mtd/spi-nor/intel-spi.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Intel PCH/PCU SPI flash driver. * * Copyright (C) 2016, Intel Corporation * Author: Mika Westerberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef INTEL_SPI_H diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h index 942b0c3f8f08..001f377fb5ef 100644 --- a/include/linux/platform_data/intel-spi.h +++ b/include/linux/platform_data/intel-spi.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Intel PCH/PCU SPI flash driver. * * Copyright (C) 2016, Intel Corporation * Author: Mika Westerberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef INTEL_SPI_PDATA_H -- cgit v1.2.3 From 32e29396f00e7849ea0b1aeebae097fc1de6e979 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Sat, 22 Jun 2019 15:02:07 +0200 Subject: hrtimer: Split out hrtimer defines into separate header To avoid include dependency hell split out the hrtimer defines which are required in the upcoming VDSO library into a separate header file. [ tglx: Split out from the VDSO library patch and included ktime.h as the new header depends on it. ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-3-vincenzo.frascino@arm.com --- include/linux/hrtimer.h | 16 +--------------- include/linux/hrtimer_defs.h | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 include/linux/hrtimer_defs.h (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2e8957eac4d4..4971100a8cab 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -12,8 +12,8 @@ #ifndef _LINUX_HRTIMER_H #define _LINUX_HRTIMER_H +#include #include -#include #include #include #include @@ -298,26 +298,12 @@ struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -/* - * The resolution of the clocks. The resolution value is returned in - * the clock_getres() system call to give application programmers an - * idea of the (in)accuracy of timers. Timer values are rounded up to - * this resolution values. - */ -# define HIGH_RES_NSEC 1 -# define KTIME_HIGH_RES (HIGH_RES_NSEC) -# define MONOTONIC_RES_NSEC HIGH_RES_NSEC -# define KTIME_MONOTONIC_RES KTIME_HIGH_RES - extern void clock_was_set_delayed(void); extern unsigned int hrtimer_resolution; #else -# define MONOTONIC_RES_NSEC LOW_RES_NSEC -# define KTIME_MONOTONIC_RES KTIME_LOW_RES - #define hrtimer_resolution (unsigned int)LOW_RES_NSEC static inline void clock_was_set_delayed(void) { } diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h new file mode 100644 index 000000000000..2d3e3c5fb946 --- /dev/null +++ b/include/linux/hrtimer_defs.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HRTIMER_DEFS_H +#define _LINUX_HRTIMER_DEFS_H + +#include + +#ifdef CONFIG_HIGH_RES_TIMERS + +/* + * The resolution of the clocks. The resolution value is returned in + * the clock_getres() system call to give application programmers an + * idea of the (in)accuracy of timers. Timer values are rounded up to + * this resolution values. + */ +# define HIGH_RES_NSEC 1 +# define KTIME_HIGH_RES (HIGH_RES_NSEC) +# define MONOTONIC_RES_NSEC HIGH_RES_NSEC +# define KTIME_MONOTONIC_RES KTIME_HIGH_RES + +#else + +# define MONOTONIC_RES_NSEC LOW_RES_NSEC +# define KTIME_MONOTONIC_RES KTIME_LOW_RES + +#endif + +#endif -- cgit v1.2.3 From 438ac88009bcb10f9ced07fbb4b32d5377ee936b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 19 Jun 2019 23:46:28 +0200 Subject: net: fastopen: robustness and endianness fixes for SipHash Some changes to the TCP fastopen code to make it more robust against future changes in the choice of key/cookie size, etc. - Instead of keeping the SipHash key in an untyped u8[] buffer and casting it to the right type upon use, use the correct type directly. This ensures that the key will appear at the correct alignment if we ever change the way these data structures are allocated. (Currently, they are only allocated via kmalloc so they always appear at the correct alignment) - Use DIV_ROUND_UP when sizing the u64[] array to hold the cookie, so it is always of sufficient size, even if TCP_FASTOPEN_COOKIE_MAX is no longer a multiple of 8. - Drop the 'len' parameter from the tcp_fastopen_reset_cipher() function, which is no longer used. - Add endian swabbing when setting the keys and calculating the hash, to ensure that cookie values are the same for a given key and source/destination address pair regardless of the endianness of the server. Note that none of these are functional changes wrt the current state of the code, with the exception of the swabbing, which only affects big endian systems. Signed-off-by: Ard Biesheuvel Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 8 ++++---- net/ipv4/sysctl_net_ipv4.c | 3 +-- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_fastopen.c | 35 +++++++++++++++++------------------ 5 files changed, 24 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2689b0b0b68a..f3a85a7fb4b1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -58,7 +58,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { - u64 val[TCP_FASTOPEN_COOKIE_MAX / sizeof(u64)]; + __le64 val[DIV_ROUND_UP(TCP_FASTOPEN_COOKIE_MAX, sizeof(u64))]; s8 len; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 573c9e9b0d72..9d36cc88d043 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include #include #include +#include extern struct inet_hashinfo tcp_hashinfo; @@ -1612,8 +1613,7 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *primary_key, void *backup_key, - unsigned int len); + void *primary_key, void *backup_key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -1623,14 +1623,14 @@ void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); -#define TCP_FASTOPEN_KEY_LENGTH 16 +#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) #define TCP_FASTOPEN_KEY_MAX 2 #define TCP_FASTOPEN_KEY_BUF_LENGTH \ (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) /* Fastopen key context */ struct tcp_fastopen_context { - __u8 key[TCP_FASTOPEN_KEY_MAX][TCP_FASTOPEN_KEY_LENGTH]; + siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; int num; struct rcu_head rcu; }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7d802acde040..7d66306b5f39 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -365,8 +365,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, } } tcp_fastopen_reset_cipher(net, NULL, key, - backup_data ? key + 4 : NULL, - TCP_FASTOPEN_KEY_LENGTH); + backup_data ? key + 4 : NULL); } bad_key: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index efd7f2b1d1f0..47c217905864 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2822,8 +2822,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) backup_key = key + TCP_FASTOPEN_KEY_LENGTH; - return tcp_fastopen_reset_cipher(net, sk, key, backup_key, - TCP_FASTOPEN_KEY_LENGTH); + return tcp_fastopen_reset_cipher(net, sk, key, backup_key); } default: /* fallthru */ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f918599181dd..3fd451271a70 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -31,7 +30,7 @@ void tcp_fastopen_init_key_once(struct net *net) * for a valid cookie, so this is an acceptable risk. */ get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key)); + tcp_fastopen_reset_cipher(net, NULL, key, NULL); } static void tcp_fastopen_ctx_free(struct rcu_head *head) @@ -68,8 +67,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) } int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *primary_key, void *backup_key, - unsigned int len) + void *primary_key, void *backup_key) { struct tcp_fastopen_context *ctx, *octx; struct fastopen_queue *q; @@ -81,9 +79,11 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, goto out; } - memcpy(ctx->key[0], primary_key, len); + ctx->key[0].key[0] = get_unaligned_le64(primary_key); + ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8); if (backup_key) { - memcpy(ctx->key[1], backup_key, len); + ctx->key[1].key[0] = get_unaligned_le64(backup_key); + ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8); ctx->num = 2; } else { ctx->num = 1; @@ -110,19 +110,18 @@ out: static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, - const u8 *key, + const siphash_key_t *key, struct tcp_fastopen_cookie *foc) { - BUILD_BUG_ON(TCP_FASTOPEN_KEY_LENGTH != sizeof(siphash_key_t)); BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); if (req->rsk_ops->family == AF_INET) { const struct iphdr *iph = ip_hdr(syn); - foc->val[0] = siphash(&iph->saddr, - sizeof(iph->saddr) + - sizeof(iph->daddr), - (const siphash_key_t *)key); + foc->val[0] = cpu_to_le64(siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } @@ -130,10 +129,10 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, if (req->rsk_ops->family == AF_INET6) { const struct ipv6hdr *ip6h = ipv6_hdr(syn); - foc->val[0] = siphash(&ip6h->saddr, - sizeof(ip6h->saddr) + - sizeof(ip6h->daddr), - (const siphash_key_t *)key); + foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, + sizeof(ip6h->saddr) + + sizeof(ip6h->daddr), + key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } @@ -154,7 +153,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk, rcu_read_lock(); ctx = tcp_fastopen_get_ctx(sk); if (ctx) - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[0], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); rcu_read_unlock(); } @@ -218,7 +217,7 @@ static int tcp_fastopen_cookie_gen_check(struct sock *sk, if (!ctx) goto out; for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[i], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc); if (tcp_fastopen_cookie_match(foc, orig)) { ret = i + 1; goto out; -- cgit v1.2.3 From caa759323c73676b3e48c8d9c86093c88b4aba97 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 12 Jun 2019 23:48:05 -0700 Subject: smp: Remove smp_call_function() and on_each_cpu() return values The return value is fixed. Remove it and amend the callers. [ tglx: Fixup arm/bL_switcher and powerpc/rtas ] Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Tony Luck Cc: Fenghua Yu Cc: Andrew Morton Link: https://lkml.kernel.org/r/20190613064813.8102-2-namit@vmware.com --- arch/alpha/kernel/smp.c | 19 +++++-------------- arch/alpha/oprofile/common.c | 6 +++--- arch/arm/common/bL_switcher.c | 6 ++---- arch/ia64/kernel/perfmon.c | 12 ++---------- arch/ia64/kernel/uncached.c | 8 ++++---- arch/powerpc/kernel/rtas.c | 3 +-- arch/x86/lib/cache-smp.c | 3 ++- drivers/char/agp/generic.c | 3 +-- include/linux/smp.h | 7 +++---- kernel/smp.c | 10 +++------- kernel/up.c | 3 +-- 11 files changed, 27 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index d0dccae53ba9..5f90df30be20 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -614,8 +614,7 @@ void smp_imb(void) { /* Must wait other processors to flush their icache before continue. */ - if (on_each_cpu(ipi_imb, NULL, 1)) - printk(KERN_CRIT "smp_imb: timed out\n"); + on_each_cpu(ipi_imb, NULL, 1); } EXPORT_SYMBOL(smp_imb); @@ -630,9 +629,7 @@ flush_tlb_all(void) { /* Although we don't have any data to pass, we do want to synchronize with the other processors. */ - if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { - printk(KERN_CRIT "flush_tlb_all: timed out\n"); - } + on_each_cpu(ipi_flush_tlb_all, NULL, 1); } #define asn_locked() (cpu_data[smp_processor_id()].asn_lock) @@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm) } } - if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) { - printk(KERN_CRIT "flush_tlb_mm: timed out\n"); - } + smp_call_function(ipi_flush_tlb_mm, mm, 1); preempt_enable(); } @@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) data.mm = mm; data.addr = addr; - if (smp_call_function(ipi_flush_tlb_page, &data, 1)) { - printk(KERN_CRIT "flush_tlb_page: timed out\n"); - } + smp_call_function(ipi_flush_tlb_page, &data, 1); preempt_enable(); } @@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page, } } - if (smp_call_function(ipi_flush_icache_page, mm, 1)) { - printk(KERN_CRIT "flush_icache_page: timed out\n"); - } + smp_call_function(ipi_flush_icache_page, mm, 1); preempt_enable(); } diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c index 310a4ce1dccc..1b1259c7d7d1 100644 --- a/arch/alpha/oprofile/common.c +++ b/arch/alpha/oprofile/common.c @@ -65,7 +65,7 @@ op_axp_setup(void) model->reg_setup(®, ctr, &sys); /* Configure the registers on all cpus. */ - (void)smp_call_function(model->cpu_setup, ®, 1); + smp_call_function(model->cpu_setup, ®, 1); model->cpu_setup(®); return 0; } @@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy) static int op_axp_start(void) { - (void)smp_call_function(op_axp_cpu_start, NULL, 1); + smp_call_function(op_axp_cpu_start, NULL, 1); op_axp_cpu_start(NULL); return 0; } @@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy) static void op_axp_stop(void) { - (void)smp_call_function(op_axp_cpu_stop, NULL, 1); + smp_call_function(op_axp_cpu_stop, NULL, 1); op_axp_cpu_stop(NULL); } diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 57f3b7512636..17bc259729e2 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -542,16 +542,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info) int bL_switcher_trace_trigger(void) { - int ret; - preempt_disable(); bL_switcher_trace_trigger_cpu(NULL); - ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); preempt_enable(); - return ret; + return 0; } EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 58a6337c0690..7c52bd2695a2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) } /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - goto cleanup_reserve; - } + on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); /* officially change to the alternate interrupt handler */ pfm_alt_intr_handler = hdl; @@ -6421,7 +6417,6 @@ int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) { int i; - int ret; if (hdl == NULL) return -EINVAL; @@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) pfm_alt_intr_handler = NULL; - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - } + on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); for_each_online_cpu(i) { pfm_unreserve_session(NULL, 1, i); diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 583f7ff6b589..c618d0745e22 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -124,8 +124,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_visibility, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; } else if (status != PAL_VISIBILITY_OK) goto failed; @@ -146,8 +146,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) if (status != PAL_STATUS_SUCCESS) goto failed; atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; /* diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index fbc676160adf..64d95eb6ffff 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -994,8 +994,7 @@ int rtas_ibm_suspend_me(u64 handle) /* Call function on all CPUs. One of us will make the * rtas call */ - if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) - atomic_set(&data.error, -EINVAL); + on_each_cpu(rtas_percpu_suspend_me, &data, 0); wait_for_completion(&done); diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 1811fa4a1b1a..7c48ff4ae8d1 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu); int wbinvd_on_all_cpus(void) { - return on_each_cpu(__wbinvd, NULL, 1); + on_each_cpu(__wbinvd, NULL, 1); + return 0; } EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 658664a5a5aa..df1edb5ec0ad 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -1311,8 +1311,7 @@ static void ipi_handler(void *null) void global_cache_flush(void) { - if (on_each_cpu(ipi_handler, NULL, 1) != 0) - panic(PFX "timed out waiting for the other CPUs!\n"); + on_each_cpu(ipi_handler, NULL, 1); } EXPORT_SYMBOL(global_cache_flush); diff --git a/include/linux/smp.h b/include/linux/smp.h index a56f08ff3097..bb8b451ab01f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -35,7 +35,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, /* * Call a function on all processors */ -int on_each_cpu(smp_call_func_t func, void *info, int wait); +void on_each_cpu(smp_call_func_t func, void *info, int wait); /* * Call a function on processors specified by mask, which might include @@ -101,7 +101,7 @@ extern void smp_cpus_done(unsigned int max_cpus); /* * Call a function on all other processors */ -int smp_call_function(smp_call_func_t func, void *info, int wait); +void smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait); @@ -144,9 +144,8 @@ static inline void smp_send_stop(void) { } * These macros fold the SMP functionality into a single CPU system */ #define raw_smp_processor_id() 0 -static inline int up_smp_call_function(smp_call_func_t func, void *info) +static inline void up_smp_call_function(smp_call_func_t func, void *info) { - return 0; } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) diff --git a/kernel/smp.c b/kernel/smp.c index 220ad142f5dd..616d4d114847 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many); * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(smp_call_func_t func, void *info, int wait) +void smp_call_function(smp_call_func_t func, void *info, int wait) { preempt_disable(); smp_call_function_many(cpu_online_mask, func, info, wait); preempt_enable(); - - return 0; } EXPORT_SYMBOL(smp_call_function); @@ -594,18 +592,16 @@ void __init smp_init(void) * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead * of local_irq_disable/enable(). */ -int on_each_cpu(void (*func) (void *info), void *info, int wait) +void on_each_cpu(void (*func) (void *info), void *info, int wait) { unsigned long flags; - int ret = 0; preempt_disable(); - ret = smp_call_function(func, info, wait); + smp_call_function(func, info, wait); local_irq_save(flags); func(info); local_irq_restore(flags); preempt_enable(); - return ret; } EXPORT_SYMBOL(on_each_cpu); diff --git a/kernel/up.c b/kernel/up.c index 483c9962c999..862b460ab97a 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) } EXPORT_SYMBOL(smp_call_function_single_async); -int on_each_cpu(smp_call_func_t func, void *info, int wait) +void on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; local_irq_save(flags); func(info); local_irq_restore(flags); - return 0; } EXPORT_SYMBOL(on_each_cpu); -- cgit v1.2.3 From e67d4dfc9ff19dbe74b29617cf2592ccc50c3920 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 12 Jun 2019 01:44:04 -0700 Subject: power: supply: Add HWMON compatibility layer Add code implementing HWMON adapter/compatibility layer to allow expositing various sensors present on power supply devices via HWMON subsystem. This is done in order to allow userspace to use single ABI/library(libsensors) to access/manipulate all of the sensors of the system. Signed-off-by: Andrey Smirnov Reviewed-by: Guenter Roeck Tested-by: Chris Healy Cc: Chris Healy Cc: Cory Tusar Cc: Lucas Stach Cc: Fabio Estevam Cc: Guenter Roeck Cc: Sebastian Reichel Cc: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/Kconfig | 14 ++ drivers/power/supply/Makefile | 1 + drivers/power/supply/power_supply_core.c | 7 + drivers/power/supply/power_supply_hwmon.c | 355 ++++++++++++++++++++++++++++++ include/linux/power_supply.h | 13 ++ 5 files changed, 390 insertions(+) create mode 100644 drivers/power/supply/power_supply_hwmon.c (limited to 'include/linux') diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index 26dacdab03cc..1f2252cb95fd 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -14,6 +14,20 @@ config POWER_SUPPLY_DEBUG Say Y here to enable debugging messages for power supply class and drivers. +config POWER_SUPPLY_HWMON + bool + prompt "Expose power supply sensors as hwmon device" + depends on HWMON=y || HWMON=POWER_SUPPLY + default y + help + This options enables API that allows sensors found on a + power supply device (current, voltage, temperature) to be + exposed as a hwmon device. + + Say 'Y' here if you want power supplies to + have hwmon sysfs interface too. + + config PDA_POWER tristate "Generic PDA/phone power driver" depends on !S390 diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile index f208273f9686..c47e88ba16b9 100644 --- a/drivers/power/supply/Makefile +++ b/drivers/power/supply/Makefile @@ -6,6 +6,7 @@ power_supply-$(CONFIG_SYSFS) += power_supply_sysfs.o power_supply-$(CONFIG_LEDS_TRIGGERS) += power_supply_leds.o obj-$(CONFIG_POWER_SUPPLY) += power_supply.o +obj-$(CONFIG_POWER_SUPPLY_HWMON) += power_supply_hwmon.o obj-$(CONFIG_GENERIC_ADC_BATTERY) += generic-adc-battery.o obj-$(CONFIG_PDA_POWER) += pda_power.o diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index f7033ecf6d0b..35624193a346 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1072,6 +1072,10 @@ __power_supply_register(struct device *parent, if (rc) goto create_triggers_failed; + rc = power_supply_add_hwmon_sysfs(psy); + if (rc) + goto add_hwmon_sysfs_failed; + /* * Update use_cnt after any uevents (most notably from device_add()). * We are here still during driver's probe but @@ -1090,6 +1094,8 @@ __power_supply_register(struct device *parent, return psy; +add_hwmon_sysfs_failed: + power_supply_remove_triggers(psy); create_triggers_failed: psy_unregister_cooler(psy); register_cooler_failed: @@ -1242,6 +1248,7 @@ void power_supply_unregister(struct power_supply *psy) cancel_work_sync(&psy->changed_work); cancel_delayed_work_sync(&psy->deferred_register_work); sysfs_remove_link(&psy->dev.kobj, "powers"); + power_supply_remove_hwmon_sysfs(psy); power_supply_remove_triggers(psy); psy_unregister_cooler(psy); psy_unregister_thermal(psy); diff --git a/drivers/power/supply/power_supply_hwmon.c b/drivers/power/supply/power_supply_hwmon.c new file mode 100644 index 000000000000..51fe60440d12 --- /dev/null +++ b/drivers/power/supply/power_supply_hwmon.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * power_supply_hwmon.c - power supply hwmon support. + */ + +#include +#include +#include +#include + +struct power_supply_hwmon { + struct power_supply *psy; + unsigned long *props; +}; + +static int power_supply_hwmon_in_to_property(u32 attr) +{ + switch (attr) { + case hwmon_in_average: + return POWER_SUPPLY_PROP_VOLTAGE_AVG; + case hwmon_in_min: + return POWER_SUPPLY_PROP_VOLTAGE_MIN; + case hwmon_in_max: + return POWER_SUPPLY_PROP_VOLTAGE_MAX; + case hwmon_in_input: + return POWER_SUPPLY_PROP_VOLTAGE_NOW; + default: + return -EINVAL; + } +} + +static int power_supply_hwmon_curr_to_property(u32 attr) +{ + switch (attr) { + case hwmon_curr_average: + return POWER_SUPPLY_PROP_CURRENT_AVG; + case hwmon_curr_max: + return POWER_SUPPLY_PROP_CURRENT_MAX; + case hwmon_curr_input: + return POWER_SUPPLY_PROP_CURRENT_NOW; + default: + return -EINVAL; + } +} + +static int power_supply_hwmon_temp_to_property(u32 attr, int channel) +{ + if (channel) { + switch (attr) { + case hwmon_temp_input: + return POWER_SUPPLY_PROP_TEMP_AMBIENT; + case hwmon_temp_min_alarm: + return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN; + case hwmon_temp_max_alarm: + return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX; + default: + break; + } + } else { + switch (attr) { + case hwmon_temp_input: + return POWER_SUPPLY_PROP_TEMP; + case hwmon_temp_max: + return POWER_SUPPLY_PROP_TEMP_MAX; + case hwmon_temp_min: + return POWER_SUPPLY_PROP_TEMP_MIN; + case hwmon_temp_min_alarm: + return POWER_SUPPLY_PROP_TEMP_ALERT_MIN; + case hwmon_temp_max_alarm: + return POWER_SUPPLY_PROP_TEMP_ALERT_MAX; + default: + break; + } + } + + return -EINVAL; +} + +static int +power_supply_hwmon_to_property(enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (type) { + case hwmon_in: + return power_supply_hwmon_in_to_property(attr); + case hwmon_curr: + return power_supply_hwmon_curr_to_property(attr); + case hwmon_temp: + return power_supply_hwmon_temp_to_property(attr, channel); + default: + return -EINVAL; + } +} + +static bool power_supply_hwmon_is_a_label(enum hwmon_sensor_types type, + u32 attr) +{ + return type == hwmon_temp && attr == hwmon_temp_label; +} + +static bool power_supply_hwmon_is_writable(enum hwmon_sensor_types type, + u32 attr) +{ + switch (type) { + case hwmon_in: + return attr == hwmon_in_min || + attr == hwmon_in_max; + case hwmon_curr: + return attr == hwmon_curr_max; + case hwmon_temp: + return attr == hwmon_temp_max || + attr == hwmon_temp_min || + attr == hwmon_temp_min_alarm || + attr == hwmon_temp_max_alarm; + default: + return false; + } +} + +static umode_t power_supply_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct power_supply_hwmon *psyhw = data; + int prop; + + + if (power_supply_hwmon_is_a_label(type, attr)) + return 0444; + + prop = power_supply_hwmon_to_property(type, attr, channel); + if (prop < 0 || !test_bit(prop, psyhw->props)) + return 0; + + if (power_supply_property_is_writeable(psyhw->psy, prop) > 0 && + power_supply_hwmon_is_writable(type, attr)) + return 0644; + + return 0444; +} + +static int power_supply_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, + const char **str) +{ + *str = channel ? "temp" : "temp ambient"; + return 0; +} + +static int +power_supply_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct power_supply_hwmon *psyhw = dev_get_drvdata(dev); + struct power_supply *psy = psyhw->psy; + union power_supply_propval pspval; + int ret, prop; + + prop = power_supply_hwmon_to_property(type, attr, channel); + if (prop < 0) + return prop; + + ret = power_supply_get_property(psy, prop, &pspval); + if (ret) + return ret; + + switch (type) { + /* + * Both voltage and current is reported in units of + * microvolts/microamps, so we need to adjust it to + * milliamps(volts) + */ + case hwmon_curr: + case hwmon_in: + pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 1000); + break; + /* + * Temp needs to be converted from 1/10 C to milli-C + */ + case hwmon_temp: + if (check_mul_overflow(pspval.intval, 100, + &pspval.intval)) + return -EOVERFLOW; + break; + default: + return -EINVAL; + } + + *val = pspval.intval; + + return 0; +} + +static int +power_supply_hwmon_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct power_supply_hwmon *psyhw = dev_get_drvdata(dev); + struct power_supply *psy = psyhw->psy; + union power_supply_propval pspval; + int prop; + + prop = power_supply_hwmon_to_property(type, attr, channel); + if (prop < 0) + return prop; + + pspval.intval = val; + + switch (type) { + /* + * Both voltage and current is reported in units of + * microvolts/microamps, so we need to adjust it to + * milliamps(volts) + */ + case hwmon_curr: + case hwmon_in: + if (check_mul_overflow(pspval.intval, 1000, + &pspval.intval)) + return -EOVERFLOW; + break; + /* + * Temp needs to be converted from 1/10 C to milli-C + */ + case hwmon_temp: + pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 100); + break; + default: + return -EINVAL; + } + + return power_supply_set_property(psy, prop, &pspval); +} + +static const struct hwmon_ops power_supply_hwmon_ops = { + .is_visible = power_supply_hwmon_is_visible, + .read = power_supply_hwmon_read, + .write = power_supply_hwmon_write, + .read_string = power_supply_hwmon_read_string, +}; + +static const struct hwmon_channel_info *power_supply_hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, + HWMON_T_LABEL | + HWMON_T_INPUT | + HWMON_T_MAX | + HWMON_T_MIN | + HWMON_T_MIN_ALARM | + HWMON_T_MIN_ALARM, + + HWMON_T_LABEL | + HWMON_T_INPUT | + HWMON_T_MIN_ALARM | + HWMON_T_LABEL | + HWMON_T_MAX_ALARM), + + HWMON_CHANNEL_INFO(curr, + HWMON_C_AVERAGE | + HWMON_C_MAX | + HWMON_C_INPUT), + + HWMON_CHANNEL_INFO(in, + HWMON_I_AVERAGE | + HWMON_I_MIN | + HWMON_I_MAX | + HWMON_I_INPUT), + NULL +}; + +static const struct hwmon_chip_info power_supply_hwmon_chip_info = { + .ops = &power_supply_hwmon_ops, + .info = power_supply_hwmon_info, +}; + +static void power_supply_hwmon_bitmap_free(void *data) +{ + bitmap_free(data); +} + +int power_supply_add_hwmon_sysfs(struct power_supply *psy) +{ + const struct power_supply_desc *desc = psy->desc; + struct power_supply_hwmon *psyhw; + struct device *dev = &psy->dev; + struct device *hwmon; + int ret, i; + + if (!devres_open_group(dev, power_supply_add_hwmon_sysfs, + GFP_KERNEL)) + return -ENOMEM; + + psyhw = devm_kzalloc(dev, sizeof(*psyhw), GFP_KERNEL); + if (!psyhw) { + ret = -ENOMEM; + goto error; + } + + psyhw->psy = psy; + psyhw->props = bitmap_zalloc(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG + 1, + GFP_KERNEL); + if (!psyhw->props) { + ret = -ENOMEM; + goto error; + } + + ret = devm_add_action(dev, power_supply_hwmon_bitmap_free, + psyhw->props); + if (ret) + goto error; + + for (i = 0; i < desc->num_properties; i++) { + const enum power_supply_property prop = desc->properties[i]; + + switch (prop) { + case POWER_SUPPLY_PROP_CURRENT_AVG: + case POWER_SUPPLY_PROP_CURRENT_MAX: + case POWER_SUPPLY_PROP_CURRENT_NOW: + case POWER_SUPPLY_PROP_TEMP: + case POWER_SUPPLY_PROP_TEMP_MAX: + case POWER_SUPPLY_PROP_TEMP_MIN: + case POWER_SUPPLY_PROP_TEMP_ALERT_MIN: + case POWER_SUPPLY_PROP_TEMP_ALERT_MAX: + case POWER_SUPPLY_PROP_TEMP_AMBIENT: + case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN: + case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX: + case POWER_SUPPLY_PROP_VOLTAGE_AVG: + case POWER_SUPPLY_PROP_VOLTAGE_MIN: + case POWER_SUPPLY_PROP_VOLTAGE_MAX: + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + set_bit(prop, psyhw->props); + break; + default: + break; + } + } + + hwmon = devm_hwmon_device_register_with_info(dev, psy->desc->name, + psyhw, + &power_supply_hwmon_chip_info, + NULL); + ret = PTR_ERR_OR_ZERO(hwmon); + if (ret) + goto error; + + devres_close_group(dev, power_supply_add_hwmon_sysfs); + return 0; +error: + devres_release_group(dev, NULL); + return ret; +} + +void power_supply_remove_hwmon_sysfs(struct power_supply *psy) +{ + devres_release_group(&psy->dev, power_supply_add_hwmon_sysfs); +} diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index d9c0c094f8a0..d5b15e039f4f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -481,4 +481,17 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) return 0; } +#ifdef CONFIG_POWER_SUPPLY_HWMON +int power_supply_add_hwmon_sysfs(struct power_supply *psy); +void power_supply_remove_hwmon_sysfs(struct power_supply *psy); +#else +static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy) +{ + return 0; +} + +static inline +void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} +#endif + #endif /* __LINUX_POWER_SUPPLY_H__ */ -- cgit v1.2.3 From 06d2bfedd147d26af6908e4202466586133e73a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 09:08:52 +0200 Subject: binfmt_flat: remove the uapi header The split between the two flat.h files is completely arbitrary, and the uapi version even contains CONFIG_ ifdefs that can't work in userspace. The only userspace program known to use the header is elf2flt, and it ships with its own version of the combined header. Use the chance to move the inclusion out of this file, as it is in no way needed for the format defintion, but just for the binfmt implementation. Signed-off-by: Christoph Hellwig Tested-by: Vladimir Murzin Reviewed-by: Vladimir Murzin Signed-off-by: Greg Ungerer --- fs/binfmt_flat.c | 1 + include/linux/flat.h | 45 ++++++++++++++++++++++++++++++++---- include/uapi/linux/flat.h | 59 ----------------------------------------------- 3 files changed, 42 insertions(+), 63 deletions(-) delete mode 100644 include/uapi/linux/flat.h (limited to 'include/linux') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index a15fdd5d95ed..b63c5e63ae3f 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -42,6 +42,7 @@ #include #include #include +#include #ifndef flat_get_relocate_addr #define flat_get_relocate_addr(rel) (rel) diff --git a/include/linux/flat.h b/include/linux/flat.h index 569b67d64d5c..21d901ba191b 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -10,8 +10,47 @@ #ifndef _LINUX_FLAT_H #define _LINUX_FLAT_H -#include -#include +#define FLAT_VERSION 0x00000004L + +#ifdef CONFIG_BINFMT_SHARED_FLAT +#define MAX_SHARED_LIBS (4) +#else +#define MAX_SHARED_LIBS (1) +#endif + +/* + * To make everything easier to port and manage cross platform + * development, all fields are in network byte order. + */ + +struct flat_hdr { + char magic[4]; + unsigned long rev; /* version (as above) */ + unsigned long entry; /* Offset of first executable instruction + with text segment from beginning of file */ + unsigned long data_start; /* Offset of data segment from beginning of + file */ + unsigned long data_end; /* Offset of end of data segment + from beginning of file */ + unsigned long bss_end; /* Offset of end of bss segment from beginning + of file */ + + /* (It is assumed that data_end through bss_end forms the bss segment.) */ + + unsigned long stack_size; /* Size of stack, in bytes */ + unsigned long reloc_start; /* Offset of relocation records from + beginning of file */ + unsigned long reloc_count; /* Number of relocation records */ + unsigned long flags; + unsigned long build_date; /* When the program/library was built */ + unsigned long filler[5]; /* Reservered, set to zero */ +}; + +#define FLAT_FLAG_RAM 0x0001 /* load program entirely into RAM */ +#define FLAT_FLAG_GOTPIC 0x0002 /* program is PIC with GOT */ +#define FLAT_FLAG_GZIP 0x0004 /* all but the header is compressed */ +#define FLAT_FLAG_GZDATA 0x0008 /* only data/relocs are compressed (for XIP) */ +#define FLAT_FLAG_KTRACE 0x0010 /* output useful kernel trace for debugging */ /* * While it would be nice to keep this header clean, users of older @@ -22,8 +61,6 @@ * with the format above, except to fix bugs with old format support. */ -#include - #define OLD_FLAT_VERSION 0x00000002L #define OLD_FLAT_RELOC_TYPE_TEXT 0 #define OLD_FLAT_RELOC_TYPE_DATA 1 diff --git a/include/uapi/linux/flat.h b/include/uapi/linux/flat.h deleted file mode 100644 index 27e595e44fb7..000000000000 --- a/include/uapi/linux/flat.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2002-2003 David McCullough - * Copyright (C) 1998 Kenneth Albanowski - * The Silver Hammer Group, Ltd. - * - * This file provides the definitions and structures needed to - * support uClinux flat-format executables. - */ - -#ifndef _UAPI_LINUX_FLAT_H -#define _UAPI_LINUX_FLAT_H - - -#define FLAT_VERSION 0x00000004L - -#ifdef CONFIG_BINFMT_SHARED_FLAT -#define MAX_SHARED_LIBS (4) -#else -#define MAX_SHARED_LIBS (1) -#endif - -/* - * To make everything easier to port and manage cross platform - * development, all fields are in network byte order. - */ - -struct flat_hdr { - char magic[4]; - unsigned long rev; /* version (as above) */ - unsigned long entry; /* Offset of first executable instruction - with text segment from beginning of file */ - unsigned long data_start; /* Offset of data segment from beginning of - file */ - unsigned long data_end; /* Offset of end of data segment - from beginning of file */ - unsigned long bss_end; /* Offset of end of bss segment from beginning - of file */ - - /* (It is assumed that data_end through bss_end forms the bss segment.) */ - - unsigned long stack_size; /* Size of stack, in bytes */ - unsigned long reloc_start; /* Offset of relocation records from - beginning of file */ - unsigned long reloc_count; /* Number of relocation records */ - unsigned long flags; - unsigned long build_date; /* When the program/library was built */ - unsigned long filler[5]; /* Reservered, set to zero */ -}; - -#define FLAT_FLAG_RAM 0x0001 /* load program entirely into RAM */ -#define FLAT_FLAG_GOTPIC 0x0002 /* program is PIC with GOT */ -#define FLAT_FLAG_GZIP 0x0004 /* all but the header is compressed */ -#define FLAT_FLAG_GZDATA 0x0008 /* only data/relocs are compressed (for XIP) */ -#define FLAT_FLAG_KTRACE 0x0010 /* output useful kernel trace for debugging */ - - - -#endif /* _UAPI_LINUX_FLAT_H */ -- cgit v1.2.3 From 38e63483a31747ef8a964ba3f0184c1e5b507749 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 09:08:53 +0200 Subject: binfmt_flat: remove the unused OLD_FLAT_FLAG_RAM definition Signed-off-by: Christoph Hellwig Signed-off-by: Greg Ungerer --- include/linux/flat.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/flat.h b/include/linux/flat.h index 21d901ba191b..2b7cda6e9c1b 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -72,15 +72,12 @@ typedef union { # if defined(mc68000) && !defined(CONFIG_COLDFIRE) signed long offset : 30; unsigned long type : 2; -# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */ # elif defined(__BIG_ENDIAN_BITFIELD) unsigned long type : 2; signed long offset : 30; -# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */ # elif defined(__LITTLE_ENDIAN_BITFIELD) signed long offset : 30; unsigned long type : 2; -# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */ # else # error "Unknown bitfield order for flat files." # endif -- cgit v1.2.3 From 3f8b76a66e0d49e3afaba595b9762c126448e783 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 09:08:54 +0200 Subject: binfmt_flat: consolidate two version of flat_v2_reloc_t Two branches of the ifdef maze actually have the same content, so merge them. Signed-off-by: Christoph Hellwig Signed-off-by: Greg Ungerer --- include/linux/flat.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/flat.h b/include/linux/flat.h index 2b7cda6e9c1b..19c586b74b99 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -69,15 +69,13 @@ struct flat_hdr { typedef union { unsigned long value; struct { -# if defined(mc68000) && !defined(CONFIG_COLDFIRE) +#if defined(__LITTLE_ENDIAN_BITFIELD) || \ + (defined(mc68000) && !defined(CONFIG_COLDFIRE)) signed long offset : 30; unsigned long type : 2; # elif defined(__BIG_ENDIAN_BITFIELD) unsigned long type : 2; signed long offset : 30; -# elif defined(__LITTLE_ENDIAN_BITFIELD) - signed long offset : 30; - unsigned long type : 2; # else # error "Unknown bitfield order for flat files." # endif -- cgit v1.2.3 From 34b4664ac4824d6c7a8b29db24b18733df07b2f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 09:08:55 +0200 Subject: binfmt_flat: use fixed size type for the on-disk format So far binfmt_flat has only been supported on 32-bit platforms, so the variable size of the fields didn't matter. But the upcoming RISC-V nommu port supports 64-bit CPUs, and we now have a conflict between the elf2flt creation tool that always uses 32-bit fields and the kernel that uses (unsigned) long field. Switch to the userspace view as the rest of the binfmt_flat format is completely architecture neutral, and binfmt_flat isn't the right binary format for huge executables to start with. While we're at it also ensure these fields are using __be types as they big endian and are byte swapped when loaded. Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin Signed-off-by: Greg Ungerer --- include/linux/flat.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/flat.h b/include/linux/flat.h index 19c586b74b99..d586bb6e64a7 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -24,26 +24,26 @@ */ struct flat_hdr { - char magic[4]; - unsigned long rev; /* version (as above) */ - unsigned long entry; /* Offset of first executable instruction - with text segment from beginning of file */ - unsigned long data_start; /* Offset of data segment from beginning of - file */ - unsigned long data_end; /* Offset of end of data segment - from beginning of file */ - unsigned long bss_end; /* Offset of end of bss segment from beginning - of file */ + char magic[4]; + __be32 rev; /* version (as above) */ + __be32 entry; /* Offset of first executable instruction + with text segment from beginning of file */ + __be32 data_start; /* Offset of data segment from beginning of + file */ + __be32 data_end; /* Offset of end of data segment from beginning + of file */ + __be32 bss_end; /* Offset of end of bss segment from beginning + of file */ /* (It is assumed that data_end through bss_end forms the bss segment.) */ - unsigned long stack_size; /* Size of stack, in bytes */ - unsigned long reloc_start; /* Offset of relocation records from - beginning of file */ - unsigned long reloc_count; /* Number of relocation records */ - unsigned long flags; - unsigned long build_date; /* When the program/library was built */ - unsigned long filler[5]; /* Reservered, set to zero */ + __be32 stack_size; /* Size of stack, in bytes */ + __be32 reloc_start; /* Offset of relocation records from beginning of + file */ + __be32 reloc_count; /* Number of relocation records */ + __be32 flags; + __be32 build_date; /* When the program/library was built */ + __u32 filler[5]; /* Reservered, set to zero */ }; #define FLAT_FLAG_RAM 0x0001 /* load program entirely into RAM */ @@ -67,15 +67,15 @@ struct flat_hdr { #define OLD_FLAT_RELOC_TYPE_BSS 2 typedef union { - unsigned long value; + u32 value; struct { #if defined(__LITTLE_ENDIAN_BITFIELD) || \ (defined(mc68000) && !defined(CONFIG_COLDFIRE)) - signed long offset : 30; - unsigned long type : 2; + s32 offset : 30; + u32 type : 2; # elif defined(__BIG_ENDIAN_BITFIELD) - unsigned long type : 2; - signed long offset : 30; + u32 type : 2; + s32 offset : 30; # else # error "Unknown bitfield order for flat files." # endif -- cgit v1.2.3 From a445d988b4790e06bb94e927e740017675d7e700 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 09:09:01 +0200 Subject: binfmt_flat: move the MAX_SHARED_LIBS definition to binfmt_flat.c MAX_SHARED_LIBS is an implementation detail of the kernel loader, and should be kept away from the file format definition. Signed-off-by: Christoph Hellwig Signed-off-by: Greg Ungerer --- fs/binfmt_flat.c | 6 ++++++ include/linux/flat.h | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 0ca65d51bb01..ccd9843e979e 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -68,6 +68,12 @@ #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ +#ifdef CONFIG_BINFMT_SHARED_FLAT +#define MAX_SHARED_LIBS (4) +#else +#define MAX_SHARED_LIBS (1) +#endif + struct lib_info { struct { unsigned long start_code; /* Start of text segment */ diff --git a/include/linux/flat.h b/include/linux/flat.h index d586bb6e64a7..83977c0ce3de 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -12,12 +12,6 @@ #define FLAT_VERSION 0x00000004L -#ifdef CONFIG_BINFMT_SHARED_FLAT -#define MAX_SHARED_LIBS (4) -#else -#define MAX_SHARED_LIBS (1) -#endif - /* * To make everything easier to port and manage cross platform * development, all fields are in network byte order. -- cgit v1.2.3 From 29d14b668d2f2e7b692525ee3f69bf12b06be0f0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Jun 2019 18:53:57 +0100 Subject: mfd: Remove unused helper syscon_regmap_lookup_by_pdevname Nobody uses the exported helper syscon_regmap_lookup_by_pdevname, to lookup a device by name. Let us remove it. Suggested-by: Arnd Bergman Cc: Arnd Bergman Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/syscon.c | 21 --------------------- include/linux/mfd/syscon.h | 6 ------ 2 files changed, 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 8ce1e41d632c..b65e585fc8c6 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -190,27 +190,6 @@ struct regmap *syscon_regmap_lookup_by_compatible(const char *s) } EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_compatible); -static int syscon_match_pdevname(struct device *dev, void *data) -{ - return !strcmp(dev_name(dev), (const char *)data); -} - -struct regmap *syscon_regmap_lookup_by_pdevname(const char *s) -{ - struct device *dev; - struct syscon *syscon; - - dev = driver_find_device(&syscon_driver.driver, NULL, (void *)s, - syscon_match_pdevname); - if (!dev) - return ERR_PTR(-EPROBE_DEFER); - - syscon = dev_get_drvdata(dev); - - return syscon->regmap; -} -EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_pdevname); - struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, const char *property) { diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index f0273c9e972b..8cfda0554381 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -19,7 +19,6 @@ struct device_node; #ifdef CONFIG_MFD_SYSCON extern struct regmap *syscon_node_to_regmap(struct device_node *np); extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); -extern struct regmap *syscon_regmap_lookup_by_pdevname(const char *s); extern struct regmap *syscon_regmap_lookup_by_phandle( struct device_node *np, const char *property); @@ -34,11 +33,6 @@ static inline struct regmap *syscon_regmap_lookup_by_compatible(const char *s) return ERR_PTR(-ENOTSUPP); } -static inline struct regmap *syscon_regmap_lookup_by_pdevname(const char *s) -{ - return ERR_PTR(-ENOTSUPP); -} - static inline struct regmap *syscon_regmap_lookup_by_phandle( struct device_node *np, const char *property) -- cgit v1.2.3 From 418e3ea157efb0eb2c6dd412a8d5f052477c7f5a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Jun 2019 18:53:59 +0100 Subject: bus_find_device: Unify the match callback with class_find_device There is an arbitrary difference between the prototypes of bus_find_device() and class_find_device() preventing their callers from passing the same pair of data and match() arguments to both of them, which is the const qualifier used in the prototype of class_find_device(). If that qualifier is also used in the bus_find_device() prototype, it will be possible to pass the same match() callback function to both bus_find_device() and class_find_device(), which will allow some optimizations to be made in order to avoid code duplication going forward. Also with that, constify the "data" parameter as it is passed as a const to the match function. For this reason, change the prototype of bus_find_device() to match the prototype of class_find_device() and adjust its callers to use the const qualifier in accordance with the new prototype of it. Cc: Alexander Shishkin Cc: Andrew Lunn Cc: Andreas Noever Cc: Arnd Bergmann Cc: Bjorn Helgaas Cc: Corey Minyard Cc: Christian Borntraeger Cc: David Kershner Cc: "David S. Miller" Cc: David Airlie Cc: Felipe Balbi Cc: Frank Rowand Cc: Grygorii Strashko Cc: Harald Freudenberger Cc: Hartmut Knaack Cc: Heiko Stuebner Cc: Jason Gunthorpe Cc: Jonathan Cameron Cc: "James E.J. Bottomley" Cc: Len Brown Cc: Mark Brown Cc: Michael Ellerman Cc: Michael Jamet Cc: "Martin K. Petersen" Cc: Peter Oberparleiter Cc: Sebastian Ott Cc: Srinivas Kandagatla Cc: Yehezkel Bernat Cc: rafael@kernel.org Acked-by: Corey Minyard Acked-by: David Kershner Acked-by: Mark Brown Acked-by: Rafael J. Wysocki Acked-by: Srinivas Kandagatla Acked-by: Wolfram Sang # for the I2C parts Acked-by: Rob Herring Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- drivers/acpi/acpi_lpss.c | 4 ++-- drivers/acpi/sleep.c | 2 +- drivers/acpi/utils.c | 4 ++-- drivers/base/bus.c | 6 +++--- drivers/base/devcon.c | 2 +- drivers/char/ipmi/ipmi_si_platform.c | 2 +- drivers/firmware/efi/dev-path-parser.c | 4 ++-- drivers/gpu/drm/drm_mipi_dsi.c | 2 +- drivers/hwtracing/coresight/coresight.c | 6 +++--- drivers/hwtracing/coresight/of_coresight.c | 2 +- drivers/hwtracing/intel_th/core.c | 5 ++--- drivers/i2c/i2c-core-acpi.c | 4 ++-- drivers/i2c/i2c-core-of.c | 4 ++-- drivers/iio/inkern.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 2 +- drivers/net/ethernet/ti/cpsw-phy-sel.c | 4 ++-- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/ethernet/toshiba/tc35815.c | 4 ++-- drivers/nvmem/core.c | 2 +- drivers/of/of_mdio.c | 2 +- drivers/of/platform.c | 2 +- drivers/pci/probe.c | 2 +- drivers/pci/search.c | 4 ++-- drivers/s390/cio/css.c | 4 ++-- drivers/s390/cio/device.c | 4 ++-- drivers/s390/cio/scm.c | 4 ++-- drivers/s390/crypto/ap_bus.c | 8 ++++---- drivers/scsi/scsi_proc.c | 2 +- drivers/spi/spi.c | 4 ++-- drivers/thunderbolt/switch.c | 4 ++-- drivers/usb/core/devio.c | 4 ++-- drivers/usb/core/usb.c | 4 ++-- drivers/usb/phy/phy-am335x-control.c | 4 ++-- drivers/usb/phy/phy-isp1301.c | 4 ++-- drivers/visorbus/visorbus_main.c | 4 ++-- include/linux/device.h | 4 ++-- sound/soc/rockchip/rk3399_gru_sound.c | 2 +- 39 files changed, 67 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 84e8ec4011ba..b91eb0929ed1 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -147,13 +147,13 @@ static const struct dma_map_ops ibmebus_dma_ops = { .unmap_page = ibmebus_unmap_page, }; -static int ibmebus_match_path(struct device *dev, void *data) +static int ibmebus_match_path(struct device *dev, const void *data) { struct device_node *dn = to_platform_device(dev)->dev.of_node; return (of_find_node_by_path(data) == dn); } -static int ibmebus_match_node(struct device *dev, void *data) +static int ibmebus_match_node(struct device *dev, const void *data) { return to_platform_device(dev)->dev.of_node == data; } diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index cf768608437e..dc2ca78748a2 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -511,10 +511,10 @@ struct hid_uid { const char *uid; }; -static int match_hid_uid(struct device *dev, void *data) +static int match_hid_uid(struct device *dev, const void *data) { struct acpi_device *adev = ACPI_COMPANION(dev); - struct hid_uid *id = data; + const struct hid_uid *id = data; if (!adev) return 0; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index a34deccd7317..fcf4386ecc78 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -454,7 +454,7 @@ static int acpi_pm_prepare(void) return error; } -static int find_powerf_dev(struct device *dev, void *data) +static int find_powerf_dev(struct device *dev, const void *data) { struct acpi_device *device = to_acpi_device(dev); const char *hid = acpi_device_hid(device); diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 1391b63cadfd..e3974a8f8fd4 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -730,10 +730,10 @@ struct acpi_dev_match_info { s64 hrv; }; -static int acpi_dev_match_cb(struct device *dev, void *data) +static int acpi_dev_match_cb(struct device *dev, const void *data) { struct acpi_device *adev = to_acpi_device(dev); - struct acpi_dev_match_info *match = data; + const struct acpi_dev_match_info *match = data; unsigned long long hrv; acpi_status status; diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 0a58e969f8b7..df3cac739813 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -323,8 +323,8 @@ EXPORT_SYMBOL_GPL(bus_for_each_dev); * return to the caller and not iterate over any more devices. */ struct device *bus_find_device(struct bus_type *bus, - struct device *start, void *data, - int (*match)(struct device *dev, void *data)) + struct device *start, const void *data, + int (*match)(struct device *dev, const void *data)) { struct klist_iter i; struct device *dev; @@ -342,7 +342,7 @@ struct device *bus_find_device(struct bus_type *bus, } EXPORT_SYMBOL_GPL(bus_find_device); -static int match_name(struct device *dev, void *data) +static int match_name(struct device *dev, const void *data) { const char *name = data; diff --git a/drivers/base/devcon.c b/drivers/base/devcon.c index 04db9ae235e4..ac026d5fc672 100644 --- a/drivers/base/devcon.c +++ b/drivers/base/devcon.c @@ -107,7 +107,7 @@ static struct bus_type *generic_match_buses[] = { NULL, }; -static int device_fwnode_match(struct device *dev, void *fwnode) +static int device_fwnode_match(struct device *dev, const void *fwnode) { return dev_fwnode(dev) == fwnode; } diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c index f2a91c4d8cab..fd94c4238449 100644 --- a/drivers/char/ipmi/ipmi_si_platform.c +++ b/drivers/char/ipmi/ipmi_si_platform.c @@ -426,7 +426,7 @@ static int ipmi_remove(struct platform_device *pdev) return ipmi_si_remove_by_dev(&pdev->dev); } -static int pdev_match_name(struct device *dev, void *data) +static int pdev_match_name(struct device *dev, const void *data) { struct platform_device *pdev = to_platform_device(dev); const char *name = data; diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index 85ec99f97841..20123384271c 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -17,9 +17,9 @@ struct acpi_hid_uid { char uid[11]; /* UINT_MAX + null byte */ }; -static int __init match_acpi_dev(struct device *dev, void *data) +static int __init match_acpi_dev(struct device *dev, const void *data) { - struct acpi_hid_uid hid_uid = *(struct acpi_hid_uid *)data; + struct acpi_hid_uid hid_uid = *(const struct acpi_hid_uid *)data; struct acpi_device *adev = to_acpi_device(dev); if (acpi_match_device_ids(adev, hid_uid.hid)) diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 80b75501f5c6..ad19df0686c9 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -93,7 +93,7 @@ static struct bus_type mipi_dsi_bus_type = { .pm = &mipi_dsi_device_pm_ops, }; -static int of_device_match(struct device *dev, void *data) +static int of_device_match(struct device *dev, const void *data) { return dev->of_node == data; } diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 4b130281236a..b67ab6a09587 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -498,9 +498,9 @@ struct coresight_device *coresight_get_sink(struct list_head *path) return csdev; } -static int coresight_enabled_sink(struct device *dev, void *data) +static int coresight_enabled_sink(struct device *dev, const void *data) { - bool *reset = data; + const bool *reset = data; struct coresight_device *csdev = to_coresight_device(dev); if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || @@ -544,7 +544,7 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate) return dev ? to_coresight_device(dev) : NULL; } -static int coresight_sink_by_id(struct device *dev, void *data) +static int coresight_sink_by_id(struct device *dev, const void *data) { struct coresight_device *csdev = to_coresight_device(dev); unsigned long hash; diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index 7045930fc958..3fc200ec1c03 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -18,7 +18,7 @@ #include -static int of_dev_node_match(struct device *dev, void *data) +static int of_dev_node_match(struct device *dev, const void *data) { return dev->of_node == data; } diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index 033dce563c99..55922896d862 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -789,10 +789,9 @@ static int intel_th_populate(struct intel_th *th) return 0; } -static int match_devt(struct device *dev, void *data) +static int match_devt(struct device *dev, const void *data) { - dev_t devt = (dev_t)(unsigned long)data; - + dev_t devt = (dev_t)(unsigned long)(void *)data; return dev->devt == devt; } diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index d84095591e45..8af35f114821 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -318,7 +318,7 @@ u32 i2c_acpi_find_bus_speed(struct device *dev) } EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed); -static int i2c_acpi_find_match_adapter(struct device *dev, void *data) +static int i2c_acpi_find_match_adapter(struct device *dev, const void *data) { struct i2c_adapter *adapter = i2c_verify_adapter(dev); @@ -328,7 +328,7 @@ static int i2c_acpi_find_match_adapter(struct device *dev, void *data) return ACPI_HANDLE(dev) == (acpi_handle)data; } -static int i2c_acpi_find_match_device(struct device *dev, void *data) +static int i2c_acpi_find_match_device(struct device *dev, const void *data) { return ACPI_COMPANION(dev) == data; } diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index 406e5f695a7e..2eb59a260ad4 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -112,12 +112,12 @@ void of_i2c_register_devices(struct i2c_adapter *adap) of_node_put(bus); } -static int of_dev_node_match(struct device *dev, void *data) +static int of_dev_node_match(struct device *dev, const void *data) { return dev->of_node == data; } -static int of_dev_or_parent_node_match(struct device *dev, void *data) +static int of_dev_or_parent_node_match(struct device *dev, const void *data) { if (dev->of_node == data) return 1; diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 4a5eff3f18bc..c46fb59d92cb 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -93,7 +93,7 @@ static const struct iio_chan_spec #ifdef CONFIG_OF -static int iio_dev_node_match(struct device *dev, void *data) +static int iio_dev_node_match(struct device *dev, const void *data) { return dev->of_node == data && dev->type == &iio_device_type; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4c5d0f160c10..fd90b05849c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -4497,7 +4497,7 @@ static const struct acpi_device_id hns_roce_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); -static int hns_roce_node_match(struct device *dev, void *fwnode) +static int hns_roce_node_match(struct device *dev, const void *fwnode) { return dev->fwnode == fwnode; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 09c16d88172e..bb6586d0e5af 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -754,7 +754,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) return (void *)misc_op; } -static int hns_dsaf_dev_match(struct device *dev, void *fwnode) +static int hns_dsaf_dev_match(struct device *dev, const void *fwnode) { return dev->fwnode == fwnode; } diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 48e0924259f5..4e184eecc8e1 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -151,9 +151,9 @@ static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv, } static struct platform_driver cpsw_phy_sel_driver; -static int match(struct device *dev, void *data) +static int match(struct device *dev, const void *data) { - struct device_node *node = (struct device_node *)data; + const struct device_node *node = (const struct device_node *)data; return dev->of_node == node && dev->driver == &cpsw_phy_sel_driver.driver; } diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 4bf65cab79e6..57d131a04db3 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1371,7 +1371,7 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) return -EOPNOTSUPP; } -static int match_first_device(struct device *dev, void *data) +static int match_first_device(struct device *dev, const void *data) { if (dev->parent && dev->parent->of_node) return of_device_is_compatible(dev->parent->of_node, diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index c50a9772f4af..8479a440527b 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -694,10 +694,10 @@ err_out: * should provide a "tc35815-mac" device with a MAC address in its * platform_data. */ -static int tc35815_mac_match(struct device *dev, void *data) +static int tc35815_mac_match(struct device *dev, const void *data) { struct platform_device *plat_dev = to_platform_device(dev); - struct pci_dev *pci_dev = data; + const struct pci_dev *pci_dev = data; unsigned int id = pci_dev->irq; return !strcmp(plat_dev->name, "tc35815-mac") && plat_dev->id == id; } diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index c7892c3da91f..ac5d945be88a 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -76,7 +76,7 @@ static struct bus_type nvmem_bus_type = { .name = "nvmem", }; -static int of_nvmem_match(struct device *dev, void *nvmem_np) +static int of_nvmem_match(struct device *dev, const void *nvmem_np) { return dev->of_node == nvmem_np; } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index de6157357e26..dfe12948c834 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -282,7 +282,7 @@ unregister: EXPORT_SYMBOL(of_mdiobus_register); /* Helper function for of_phy_find_device */ -static int of_phy_match(struct device *dev, void *phy_np) +static int of_phy_match(struct device *dev, const void *phy_np) { return dev->of_node == phy_np; } diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 04ad312fd85b..008d79e33c2d 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -37,7 +37,7 @@ static const struct of_device_id of_skipped_node_table[] = { {} /* Empty terminated list */ }; -static int of_dev_node_match(struct device *dev, void *data) +static int of_dev_node_match(struct device *dev, const void *data) { return dev->of_node == data; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0e8e2c186f50..f9ef7ad3f75d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -64,7 +64,7 @@ static struct resource *get_pci_domain_busn_res(int domain_nr) return &r->res; } -static int find_anything(struct device *dev, void *data) +static int find_anything(struct device *dev, const void *data) { return 1; } diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 5c7922612733..7f4e65872b8d 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -236,10 +236,10 @@ struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, } EXPORT_SYMBOL(pci_get_domain_bus_and_slot); -static int match_pci_dev_by_id(struct device *dev, void *data) +static int match_pci_dev_by_id(struct device *dev, const void *data) { struct pci_dev *pdev = to_pci_dev(dev); - struct pci_device_id *id = data; + const struct pci_device_id *id = data; if (pci_match_one_device(id, pdev)) return 1; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index aea502922646..a2c97830efe0 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -434,10 +434,10 @@ static int css_probe_device(struct subchannel_id schid, struct schib *schib) } static int -check_subchannel(struct device * dev, void * data) +check_subchannel(struct device *dev, const void *data) { struct subchannel *sch; - struct subchannel_id *schid = data; + struct subchannel_id *schid = (void *)data; sch = to_subchannel(dev); return schid_equal(&sch->schid, schid); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 1540229a37bb..d32f373e5bc7 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -642,10 +642,10 @@ static int ccw_device_add(struct ccw_device *cdev) return device_add(dev); } -static int match_dev_id(struct device *dev, void *data) +static int match_dev_id(struct device *dev, const void *data) { struct ccw_device *cdev = to_ccwdev(dev); - struct ccw_dev_id *dev_id = data; + struct ccw_dev_id *dev_id = (void *)data; return ccw_dev_id_is_equal(&cdev->private->dev_id, dev_id); } diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index 6bca1d5455d4..9f26d4310bb3 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -174,10 +174,10 @@ out: kobject_uevent(&scmdev->dev.kobj, KOBJ_CHANGE); } -static int check_address(struct device *dev, void *data) +static int check_address(struct device *dev, const void *data) { struct scm_device *scmdev = to_scm_dev(dev); - struct sale *sale = data; + const struct sale *sale = data; return scmdev->address == sale->sa; } diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index b9fc502c58c2..b7902b643ec8 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1356,16 +1356,16 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) * Helper function to be used with bus_find_dev * matches for the card device with the given id */ -static int __match_card_device_with_id(struct device *dev, void *data) +static int __match_card_device_with_id(struct device *dev, const void *data) { - return is_card_dev(dev) && to_ap_card(dev)->id == (int)(long) data; + return is_card_dev(dev) && to_ap_card(dev)->id == (int)(long)(void *) data; } /* * Helper function to be used with bus_find_dev * matches for the queue device with a given qid */ -static int __match_queue_device_with_qid(struct device *dev, void *data) +static int __match_queue_device_with_qid(struct device *dev, const void *data) { return is_queue_dev(dev) && to_ap_queue(dev)->qid == (int)(long) data; } @@ -1374,7 +1374,7 @@ static int __match_queue_device_with_qid(struct device *dev, void *data) * Helper function to be used with bus_find_dev * matches any queue device with given queue id */ -static int __match_queue_device_with_queue_id(struct device *dev, void *data) +static int __match_queue_device_with_queue_id(struct device *dev, const void *data) { return is_queue_dev(dev) && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data; diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 7f0ceb65c3f3..c074631086a4 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -372,7 +372,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, return err; } -static int always_match(struct device *dev, void *data) +static int always_match(struct device *dev, const void *data) { return 1; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 5e75944ad5d1..3da1121f7572 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3538,7 +3538,7 @@ EXPORT_SYMBOL_GPL(spi_write_then_read); /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_OF) -static int __spi_of_device_match(struct device *dev, void *data) +static int __spi_of_device_match(struct device *dev, const void *data) { return dev->of_node == data; } @@ -3639,7 +3639,7 @@ static int spi_acpi_controller_match(struct device *dev, const void *data) return ACPI_COMPANION(dev->parent) == data; } -static int spi_acpi_device_match(struct device *dev, void *data) +static int spi_acpi_device_match(struct device *dev, const void *data) { return ACPI_COMPANION(dev) == data; } diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index c1b016574fb4..c9a7e4a779cd 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1946,10 +1946,10 @@ struct tb_sw_lookup { u64 route; }; -static int tb_switch_match(struct device *dev, void *data) +static int tb_switch_match(struct device *dev, const void *data) { struct tb_switch *sw = tb_to_switch(dev); - struct tb_sw_lookup *lookup = data; + const struct tb_sw_lookup *lookup = data; if (!sw) return 0; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index fa783531ee88..7bd7de7273a3 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -947,9 +947,9 @@ error: return ret; } -static int match_devt(struct device *dev, void *data) +static int match_devt(struct device *dev, const void *data) { - return dev->devt == (dev_t) (unsigned long) data; + return dev->devt == (dev_t)(unsigned long)(void *)data; } static struct usb_device *usbdev_lookup_by_devt(dev_t devt) diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 7fcb9f782931..1678e305e037 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -325,9 +325,9 @@ struct find_interface_arg { struct device_driver *drv; }; -static int __find_interface(struct device *dev, void *data) +static int __find_interface(struct device *dev, const void *data) { - struct find_interface_arg *arg = data; + const struct find_interface_arg *arg = data; struct usb_interface *intf; if (!is_usb_interface(dev)) diff --git a/drivers/usb/phy/phy-am335x-control.c b/drivers/usb/phy/phy-am335x-control.c index a3cb25cb74f8..d16dfc320faa 100644 --- a/drivers/usb/phy/phy-am335x-control.c +++ b/drivers/usb/phy/phy-am335x-control.c @@ -118,9 +118,9 @@ static const struct of_device_id omap_control_usb_id_table[] = { MODULE_DEVICE_TABLE(of, omap_control_usb_id_table); static struct platform_driver am335x_control_driver; -static int match(struct device *dev, void *data) +static int match(struct device *dev, const void *data) { - struct device_node *node = (struct device_node *)data; + const struct device_node *node = (const struct device_node *)data; return dev->of_node == node && dev->driver == &am335x_control_driver.driver; } diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index 93b7d6a30aad..6cf6fbd39237 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -142,9 +142,9 @@ static struct i2c_driver isp1301_driver = { module_i2c_driver(isp1301_driver); -static int match(struct device *dev, void *data) +static int match(struct device *dev, const void *data) { - struct device_node *node = (struct device_node *)data; + const struct device_node *node = (const struct device_node *)data; return (dev->of_node == node) && (dev->driver == &isp1301_driver.driver); } diff --git a/drivers/visorbus/visorbus_main.c b/drivers/visorbus/visorbus_main.c index 0b2434cc4ecd..152fd29f04f2 100644 --- a/drivers/visorbus/visorbus_main.c +++ b/drivers/visorbus/visorbus_main.c @@ -171,10 +171,10 @@ struct visor_busdev { u32 dev_no; }; -static int match_visorbus_dev_by_id(struct device *dev, void *data) +static int match_visorbus_dev_by_id(struct device *dev, const void *data) { struct visor_device *vdev = to_visor_device(dev); - struct visor_busdev *id = data; + const struct visor_busdev *id = data; if (vdev->chipset_bus_no == id->bus_no && vdev->chipset_dev_no == id->dev_no) diff --git a/include/linux/device.h b/include/linux/device.h index e85264fb6616..cbbdcadc660e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -166,8 +166,8 @@ void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(struct bus_type *bus, struct device *start, - void *data, - int (*match)(struct device *dev, void *data)); + const void *data, + int (*match)(struct device *dev, const void *data)); struct device *bus_find_device_by_name(struct bus_type *bus, struct device *start, const char *name); diff --git a/sound/soc/rockchip/rk3399_gru_sound.c b/sound/soc/rockchip/rk3399_gru_sound.c index 3d0cc6e90d7b..c04c9ed185b7 100644 --- a/sound/soc/rockchip/rk3399_gru_sound.c +++ b/sound/soc/rockchip/rk3399_gru_sound.c @@ -405,7 +405,7 @@ static const struct dailink_match_data dailink_match[] = { }, }; -static int of_dev_node_match(struct device *dev, void *data) +static int of_dev_node_match(struct device *dev, const void *data) { return dev->of_node == data; } -- cgit v1.2.3 From 92ce7e83b4e5c86687d748ba53cb755acdce1256 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Jun 2019 18:54:00 +0100 Subject: driver_find_device: Unify the match function with class_find_device() The driver_find_device() accepts a match function pointer to filter the devices for lookup, similar to bus/class_find_device(). However, there is a minor difference in the prototype for the match parameter for driver_find_device() with the now unified version accepted by {bus/class}_find_device(), where it doesn't accept a "const" qualifier for the data argument. This prevents us from reusing the generic match functions for driver_find_device(). For this reason, change the prototype of the driver_find_device() to make the "match" parameter in line with {bus/class}_find_device() and adjust its callers to use the const qualifier. Also, we could now promote the "data" parameter to const as we pass it down as a const parameter to the match functions. Cc: Corey Minyard Cc: Russell King Cc: Thierry Reding Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Will Deacon Cc: Joerg Roedel Cc: Peter Oberparleiter Cc: Sebastian Ott Cc: David Airlie Cc: Daniel Vetter Cc: Nehal Shah Cc: Shyam Sundar S K Cc: Lee Jones Cc: Christian Borntraeger Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/amba/tegra-ahb.c | 4 ++-- drivers/base/driver.c | 4 ++-- drivers/char/ipmi/ipmi_msghandler.c | 8 ++++---- drivers/gpu/drm/tegra/dc.c | 4 ++-- drivers/i2c/busses/i2c-amd-mp2-pci.c | 2 +- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/mfd/altera-sysmgr.c | 4 ++-- drivers/s390/cio/ccwgroup.c | 4 ++-- drivers/s390/cio/chsc_sch.c | 2 +- drivers/s390/cio/device.c | 2 +- include/linux/device.h | 4 ++-- 12 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 3751d811be39..42175a67ba0e 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -143,10 +143,10 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) } #ifdef CONFIG_TEGRA_IOMMU_SMMU -static int tegra_ahb_match_by_smmu(struct device *dev, void *data) +static int tegra_ahb_match_by_smmu(struct device *dev, const void *data) { struct tegra_ahb *ahb = dev_get_drvdata(dev); - struct device_node *dn = data; + const struct device_node *dn = data; return (ahb->dev->of_node == dn) ? 1 : 0; } diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 857c8f1b876e..4e5ca632f35e 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -73,8 +73,8 @@ EXPORT_SYMBOL_GPL(driver_for_each_device); * return to the caller and not iterate over any more devices. */ struct device *driver_find_device(struct device_driver *drv, - struct device *start, void *data, - int (*match)(struct device *dev, void *data)) + struct device *start, const void *data, + int (*match)(struct device *dev, const void *data)) { struct klist_iter i; struct device *dev; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 1dc10740fc0f..6707659cffd6 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2819,9 +2819,9 @@ static const struct device_type bmc_device_type = { .groups = bmc_dev_attr_groups, }; -static int __find_bmc_guid(struct device *dev, void *data) +static int __find_bmc_guid(struct device *dev, const void *data) { - guid_t *guid = data; + const guid_t *guid = data; struct bmc_device *bmc; int rv; @@ -2857,9 +2857,9 @@ struct prod_dev_id { unsigned char device_id; }; -static int __find_bmc_prod_dev_id(struct device *dev, void *data) +static int __find_bmc_prod_dev_id(struct device *dev, const void *data) { - struct prod_dev_id *cid = data; + const struct prod_dev_id *cid = data; struct bmc_device *bmc; int rv; diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 607a6ea17ecc..52109a63e797 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2375,10 +2375,10 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc) return 0; } -static int tegra_dc_match_by_pipe(struct device *dev, void *data) +static int tegra_dc_match_by_pipe(struct device *dev, const void *data) { struct tegra_dc *dc = dev_get_drvdata(dev); - unsigned int pipe = (unsigned long)data; + unsigned int pipe = (unsigned long)(void *)data; return dc->pipe == pipe; } diff --git a/drivers/i2c/busses/i2c-amd-mp2-pci.c b/drivers/i2c/busses/i2c-amd-mp2-pci.c index 455e1f36a2a3..c7fe3b44a860 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-pci.c +++ b/drivers/i2c/busses/i2c-amd-mp2-pci.c @@ -457,7 +457,7 @@ static struct pci_driver amd_mp2_pci_driver = { }; module_pci_driver(amd_mp2_pci_driver); -static int amd_mp2_device_match(struct device *dev, void *data) +static int amd_mp2_device_match(struct device *dev, const void *data) { return 1; } diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4d5a694f02c2..d787856f9dcf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2023,7 +2023,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) static struct platform_driver arm_smmu_driver; -static int arm_smmu_match_node(struct device *dev, void *data) +static int arm_smmu_match_node(struct device *dev, const void *data) { return dev->fwnode == data; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5e54cc0a28b3..4ce429b74655 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1431,7 +1431,7 @@ static bool arm_smmu_capable(enum iommu_cap cap) } } -static int arm_smmu_match_node(struct device *dev, void *data) +static int arm_smmu_match_node(struct device *dev, const void *data) { return dev->fwnode == data; } diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 8976f82785bb..2ee14d8a6d31 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -92,9 +92,9 @@ static struct regmap_config altr_sysmgr_regmap_cfg = { * Matching function used by driver_find_device(). * Return: True if match is found, otherwise false. */ -static int sysmgr_match_phandle(struct device *dev, void *data) +static int sysmgr_match_phandle(struct device *dev, const void *data) { - return dev->of_node == (struct device_node *)data; + return dev->of_node == (const struct device_node *)data; } /** diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 4ebf6d4fc66c..ea17615789c9 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -581,7 +581,7 @@ int ccwgroup_driver_register(struct ccwgroup_driver *cdriver) } EXPORT_SYMBOL(ccwgroup_driver_register); -static int __ccwgroup_match_all(struct device *dev, void *data) +static int __ccwgroup_match_all(struct device *dev, const void *data) { return 1; } @@ -608,7 +608,7 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) } EXPORT_SYMBOL(ccwgroup_driver_unregister); -static int __ccwgroupdev_check_busid(struct device *dev, void *id) +static int __ccwgroupdev_check_busid(struct device *dev, const void *id) { char *bus_id = id; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 8d9f36625ba5..8f080d3fd380 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -203,7 +203,7 @@ static void chsc_cleanup_sch_driver(void) static DEFINE_SPINLOCK(chsc_lock); -static int chsc_subchannel_match_next_free(struct device *dev, void *data) +static int chsc_subchannel_match_next_free(struct device *dev, const void *data) { struct subchannel *sch = to_subchannel(dev); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index d32f373e5bc7..f27536ba58eb 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1653,7 +1653,7 @@ EXPORT_SYMBOL_GPL(ccw_device_force_console); * get ccw_device matching the busid, but only if owned by cdrv */ static int -__ccwdev_check_busid(struct device *dev, void *id) +__ccwdev_check_busid(struct device *dev, const void *id) { char *bus_id; diff --git a/include/linux/device.h b/include/linux/device.h index cbbdcadc660e..4d7c88131a4d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -336,8 +336,8 @@ extern int __must_check driver_for_each_device(struct device_driver *drv, int (*fn)(struct device *dev, void *)); struct device *driver_find_device(struct device_driver *drv, - struct device *start, void *data, - int (*match)(struct device *dev, void *data)); + struct device *start, const void *data, + int (*match)(struct device *dev, const void *data)); void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); -- cgit v1.2.3 From 65b66682344a15ba2069d4dd8d0cc39cc3aed7e9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Jun 2019 18:54:01 +0100 Subject: drivers: Add generic helper to match by of_node Add a helper to match device by the of_node. This will be later used to provide wrappers to the device iterators for {bus/class/driver}_find_device(). Convert other users to reuse this new helper. Cc: Alan Tull Cc: Andrew Lunn Cc: Daniel Vetter Cc: David Airlie Cc: "David S. Miller" Cc: devicetree@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Florian Fainelli Cc: Frank Rowand Cc: Greg Kroah-Hartman Cc: Heiner Kallweit Cc: Jiri Slaby Cc: Jonathan Hunter Cc: Lee Jones Cc: Liam Girdwood Cc: linux-fpga@vger.kernel.org Cc: linux-i2c@vger.kernel.org Cc: linux-spi@vger.kernel.org Cc: Maarten Lankhorst Cc: Mark Brown Cc: Mathieu Poirier Cc: Maxime Ripard Cc: Moritz Fischer Cc: Peter Rosin Cc: Rob Herring Cc: Srinivas Kandagatla Cc: Thierry Reding Cc: Thor Thayer Cc: Wolfram Sang Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Ulf Hansson Cc: Joe Perches Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++++ drivers/fpga/of-fpga-region.c | 7 +------ include/linux/device.h | 2 ++ 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index fd7511e04e62..92119080474c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3328,3 +3328,9 @@ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2) dev->of_node_reused = true; } EXPORT_SYMBOL_GPL(device_set_of_node_from_dev); + +int device_match_of_node(struct device *dev, const void *np) +{ + return dev->of_node == np; +} +EXPORT_SYMBOL_GPL(device_match_of_node); diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c index 75f64abf9c81..e405309baadc 100644 --- a/drivers/fpga/of-fpga-region.c +++ b/drivers/fpga/of-fpga-region.c @@ -22,11 +22,6 @@ static const struct of_device_id fpga_region_of_match[] = { }; MODULE_DEVICE_TABLE(of, fpga_region_of_match); -static int fpga_region_of_node_match(struct device *dev, const void *data) -{ - return dev->of_node == data; -} - /** * of_fpga_region_find - find FPGA region * @np: device node of FPGA Region @@ -37,7 +32,7 @@ static int fpga_region_of_node_match(struct device *dev, const void *data) */ static struct fpga_region *of_fpga_region_find(struct device_node *np) { - return fpga_region_class_find(NULL, np, fpga_region_of_node_match); + return fpga_region_class_find(NULL, np, device_match_of_node); } /** diff --git a/include/linux/device.h b/include/linux/device.h index 4d7c88131a4d..709308560d32 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -163,6 +163,8 @@ void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); void subsys_dev_iter_exit(struct subsys_dev_iter *iter); +int device_match_of_node(struct device *dev, const void *np); + int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(struct bus_type *bus, struct device *start, -- cgit v1.2.3 From 38b37d631aec80da0c65ac03a7ef680b468c7857 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 7 Jun 2019 12:49:11 +0200 Subject: module: allow arch overrides for .exit section names Some archs like ARM store unwind information for .exit.text in sections with unusual names. As this unwind information refers to .exit.text, it must not be loaded when .exit.text is not loaded (when CONFIG_MODULE_UNLOAD is unset); otherwise, loading a module can fail due to relocation failures. Signed-off-by: Matthias Schiffer Signed-off-by: Jessica Yu --- include/linux/moduleloader.h | 5 +++++ kernel/module.c | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 31013c2effd3..5229c18025e9 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -29,6 +29,11 @@ void *module_alloc(unsigned long size); /* Free memory returned from module_alloc. */ void module_memfree(void *module_region); +/* Determines if the section name is an exit section (that is only used during + * module unloading) + */ +bool module_exit_section(const char *name); + /* * Apply the given relocation to the (simplified) ELF. Return -error * or 0. diff --git a/kernel/module.c b/kernel/module.c index 41258bab24f1..537c456ce3ee 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2748,6 +2748,11 @@ void * __weak module_alloc(unsigned long size) return vmalloc_exec(size); } +bool __weak module_exit_section(const char *name) +{ + return strstarts(name, ".exit"); +} + #ifdef CONFIG_DEBUG_KMEMLEAK static void kmemleak_load_module(const struct module *mod, const struct load_info *info) @@ -2937,7 +2942,7 @@ static int rewrite_section_headers(struct load_info *info, int flags) #ifndef CONFIG_MODULE_UNLOAD /* Don't load .exit sections */ - if (strstarts(info->secstrings+shdr->sh_name, ".exit")) + if (module_exit_section(info->secstrings+shdr->sh_name)) shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; #endif } -- cgit v1.2.3 From b0935123a18360d19f1dcc779ea33841cdc304cc Mon Sep 17 00:00:00 2001 From: Prakhar Srivastava Date: Sun, 23 Jun 2019 23:23:29 -0700 Subject: IMA: Define a new hook to measure the kexec boot command line arguments Currently during soft reboot(kexec_file_load) boot command line arguments are not measured. Define hooks needed to measure kexec command line arguments during soft reboot(kexec_file_load). - A new ima hook ima_kexec_cmdline is defined to be called by the kexec code. - A new function process_buffer_measurement is defined to measure the buffer hash into the IMA measurement list. - A new func policy KEXEC_CMDLINE is defined to control the measurement. Signed-off-by: Prakhar Srivastava Signed-off-by: Mimi Zohar --- Documentation/ABI/testing/ima_policy | 1 + include/linux/ima.h | 2 + security/integrity/ima/ima.h | 1 + security/integrity/ima/ima_api.c | 1 + security/integrity/ima/ima_main.c | 72 ++++++++++++++++++++++++++++++++++++ security/integrity/ima/ima_policy.c | 7 ++++ 6 files changed, 84 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index b383c1763610..fc376a323908 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -28,6 +28,7 @@ Description: base: func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] [KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK] + [KEXEC_CMDLINE] mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND] [[^]MAY_EXEC] fsmagic:= hex value diff --git a/include/linux/ima.h b/include/linux/ima.h index fd9f7cf4cdf5..b42f5a006042 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -26,6 +26,7 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); +extern void ima_kexec_cmdline(const void *buf, int size); #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); @@ -92,6 +93,7 @@ static inline void ima_post_path_mknod(struct dentry *dentry) return; } +static inline void ima_kexec_cmdline(const void *buf, int size) {} #endif /* CONFIG_IMA */ #ifndef CONFIG_IMA_KEXEC diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index e7b9ea7732d9..bdca641f9e51 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -190,6 +190,7 @@ static inline unsigned long ima_hash_key(u8 *digest) hook(KEXEC_KERNEL_CHECK) \ hook(KEXEC_INITRAMFS_CHECK) \ hook(POLICY_CHECK) \ + hook(KEXEC_CMDLINE) \ hook(MAX_CHECK) #define __ima_hook_enumify(ENUM) ENUM, diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c0cf4bcfc82f..d426d4d1fe04 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -178,6 +178,7 @@ err_out: * subj=, obj=, type=, func=, mask=, fsmagic= * subj,obj, and type: are LSM specific. * func: FILE_CHECK | BPRM_CHECK | CREDS_CHECK | MMAP_CHECK | MODULE_CHECK + * | KEXEC_CMDLINE * mask: contains the permission mask * fsmagic: hex value * diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index a7e7e2d7224c..2507bee1b762 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -609,6 +609,78 @@ int ima_load_data(enum kernel_load_data_id id) return 0; } +/* + * process_buffer_measurement - Measure the buffer to ima log. + * @buf: pointer to the buffer that needs to be added to the log. + * @size: size of buffer(in bytes). + * @eventname: event name to be used for the buffer entry. + * @cred: a pointer to a credentials structure for user validation. + * @secid: the secid of the task to be validated. + * + * Based on policy, the buffer is measured into the ima log. + */ +static void process_buffer_measurement(const void *buf, int size, + const char *eventname, + const struct cred *cred, u32 secid) +{ + int ret = 0; + struct ima_template_entry *entry = NULL; + struct integrity_iint_cache iint = {}; + struct ima_event_data event_data = {.iint = &iint, + .filename = eventname}; + struct ima_template_desc *template_desc = NULL; + struct { + struct ima_digest_data hdr; + char digest[IMA_MAX_DIGEST_SIZE]; + } hash = {}; + int violation = 0; + int pcr = CONFIG_IMA_MEASURE_PCR_IDX; + int action = 0; + + action = ima_get_action(NULL, cred, secid, 0, KEXEC_CMDLINE, &pcr, + &template_desc); + if (!(action & IMA_MEASURE)) + return; + + iint.ima_hash = &hash.hdr; + iint.ima_hash->algo = ima_hash_algo; + iint.ima_hash->length = hash_digest_size[ima_hash_algo]; + + ret = ima_calc_buffer_hash(buf, size, iint.ima_hash); + if (ret < 0) + goto out; + + ret = ima_alloc_init_template(&event_data, &entry, template_desc); + if (ret < 0) + goto out; + + ret = ima_store_template(entry, violation, NULL, buf, pcr); + + if (ret < 0) + ima_free_template_entry(entry); + +out: + return; +} + +/** + * ima_kexec_cmdline - measure kexec cmdline boot args + * @buf: pointer to buffer + * @size: size of buffer + * + * Buffers can only be measured, not appraised. + */ +void ima_kexec_cmdline(const void *buf, int size) +{ + u32 secid; + + if (buf && size != 0) { + security_task_getsecid(current, &secid); + process_buffer_measurement(buf, size, "kexec-cmdline", + current_cred(), secid); + } +} + static int __init init_ima(void) { int error; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 98c289559079..a3058b03a955 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -374,6 +374,11 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode, { int i; + if (func == KEXEC_CMDLINE) { + if ((rule->flags & IMA_FUNC) && (rule->func == func)) + return true; + return false; + } if ((rule->flags & IMA_FUNC) && (rule->func != func && func != POST_SETATTR)) return false; @@ -956,6 +961,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) entry->func = KEXEC_INITRAMFS_CHECK; else if (strcmp(args[0].from, "POLICY_CHECK") == 0) entry->func = POLICY_CHECK; + else if (strcmp(args[0].from, "KEXEC_CMDLINE") == 0) + entry->func = KEXEC_CMDLINE; else result = -EINVAL; if (!result) -- cgit v1.2.3 From 38ca87c6f1e514686d4a385246d1afe1e1f2e482 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 11 Jun 2019 18:52:46 +0300 Subject: RDMA/mlx5: Introduce and implement new IB_WR_REG_MR_INTEGRITY work request This new WR will be used to perform PI (protection information) handover using the new API. Using the new API, the user will post a single WR that will internally perform all the needed actions to complete PI operation. This new WR will use a memory region that was allocated as IB_MR_TYPE_INTEGRITY and was mapped using ib_map_mr_sg_pi to perform the registration. In the old API, in order to perform a signature handover operation, each ULP should perform the following: 1. Map and register the data buffers. 2. Map and register the protection buffers. 3. Post a special reg WR to configure the signature handover operation layout. 4. Invalidate the signature memory key. 5. Invalidate protection buffers memory key. 6. Invalidate data buffers memory key. In the new API, the mapping of both data and protection buffers is performed using a single call to ib_map_mr_sg_pi function. Also the registration of the buffers and the configuration of the signature operation layout is done by a single new work request called IB_WR_REG_MR_INTEGRITY. This patch implements this operation for mlx5 devices that are capable to offload data integrity generation/validation while performing the actual buffer transfer. This patch will not remove the old signature API that is used by the iSER initiator and target drivers. This will be done in the future. In the internal implementation, for each IB_WR_REG_MR_INTEGRITY work request, we are using a single UMR operation to register both data and protection buffers using KLM's. Afterwards, another UMR operation will describe the strided block format. These will be followed by 2 SET_PSV operations to set the memory/wire domains initial signature parameters passed by the user. In the end of the whole transaction, only the signature memory key (the one that exposed for the RDMA operation) will be invalidated. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 218 ++++++++++++++++++++++++++++++++++++---- include/linux/mlx5/qp.h | 3 +- include/rdma/ib_verbs.h | 1 + 3 files changed, 201 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce8fccb04c3c..f6651b93e469 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4169,7 +4169,7 @@ static __be64 sig_mkey_mask(void) static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, struct mlx5_ib_mr *mr, u8 flags) { - int size = mr->ndescs * mr->desc_size; + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; memset(umr, 0, sizeof(*umr)); @@ -4300,7 +4300,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { - int ndescs = ALIGN(mr->ndescs, 8) >> 1; + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); @@ -4351,7 +4351,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { - int bcount = mr->desc_size * mr->ndescs; + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); @@ -4544,23 +4544,52 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, - int *size, void **cur_edge) +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - struct ib_sig_attrs *sig_attrs = wr->sig_attrs; - struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->wr.sg_list->length; - u32 data_key = wr->wr.sg_list->lkey; - u64 data_va = wr->wr.sg_list->addr; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; int ret; int wqe_size; - if (!wr->prot || - (data_key == wr->prot->lkey && - data_va == wr->prot->addr && - data_len == wr->prot->length)) { + if (send_wr->opcode == IB_WR_REG_SIG_MR) { + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + + data_len = wr->wr.sg_list->length; + data_key = wr->wr.sg_list->lkey; + data_va = wr->wr.sg_list->addr; + if (wr->prot) { + prot_len = wr->prot->length; + prot_key = wr->prot->lkey; + prot_va = wr->prot->addr; + prot = true; + } + } else { + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->ibmr.iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->ibmr.iova + data_len; + prot = true; + } + } + + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -4594,8 +4623,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->prot->lkey; - u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -4673,6 +4700,56 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = sig_mkey_mask(); } +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; + u32 pdn = get_pd(qp)->pdn; + u32 xlt_size; + int region_len, ret; + + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = pi_mr->ibmr.length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); + + set_sig_umr_segment(*seg, xlt_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} static int set_sig_umr_wr(const struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size, @@ -4716,7 +4793,8 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); + ret = set_sig_data_segment(send_wr, wr->sig_mr, wr->sig_attrs, qp, seg, + size, cur_edge); if (ret) return ret; @@ -4758,7 +4836,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - size_t mr_list_size = mr->ndescs * mr->desc_size; + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; u8 flags = 0; @@ -4899,8 +4977,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; + struct ib_reg_wr reg_pi_wr; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -4954,7 +5035,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; } else { @@ -5012,6 +5094,102 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_MR_INTEGRITY: + memset(®_pi_wr, 0, sizeof(struct ib_reg_wr)); + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; + ctrl->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + protection registration */ + err = set_reg_wr(qp, ®_pi_wr, &seg, &size, + &cur_edge, false); + if (err) { + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); + + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, &seg, &size, + &cur_edge); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); + + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + sig_attrs = mr->ibmr.sig_attrs; + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq, false, + true); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + err = set_psv_wr(&sig_attrs->mem, + mr->sig->psv_memory.psv_idx, + &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, next_fence, + MLX5_OPCODE_SET_PSV); + + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq, false, + true); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + err = set_psv_wr(&sig_attrs->wire, + mr->sig->psv_wire.psv_idx, + &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, next_fence, + MLX5_OPCODE_SET_PSV); + + qp->next_fence = + MLX5_FENCE_MODE_INITIATOR_SMALL; + num_sge = 0; + goto skip_psv; + case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; mr = to_mmr(sig_handover_wr(wr)->sig_mr); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3ba4edbd17a6..08e43cd9e742 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -37,7 +37,8 @@ #include #define MLX5_INVALID_LKEY 0x100 -#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) +/* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */ +#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8) #define MLX5_DIF_SIZE 8 #define MLX5_STRIDE_BLOCK_OP 0x400 #define MLX5_CPY_GRD_MASK 0xc0 diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9169e798334f..28db256cbdb9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1236,6 +1236,7 @@ enum ib_wr_opcode { /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, IB_WR_REG_SIG_MR, + IB_WR_REG_MR_INTEGRITY, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. -- cgit v1.2.3 From fa49e1d37bbd6d25a11379891ece1e4d5d313036 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 28 May 2019 05:07:26 -0400 Subject: media: marvell-ccic: drop unused stuff Remove structure members and headers that are not actually used. Saves us from some noise in subsequent cleanup commits. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/marvell-ccic/mcam-core.c | 1 - drivers/media/platform/marvell-ccic/mcam-core.h | 2 -- drivers/media/platform/marvell-ccic/mmp-driver.c | 2 -- include/linux/platform_data/media/mmp-camera.h | 1 - 4 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 2494a31de01b..76641d5211ab 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -1776,7 +1776,6 @@ int mccic_register(struct mcam_camera *cam) */ sensor_cfg.clock_speed = cam->clock_speed; sensor_cfg.use_smbus = cam->use_smbus; - cam->sensor_addr = ov7670_info.addr; cam->sensor = v4l2_i2c_new_subdev_board(&cam->v4l2_dev, cam->i2c_adapter, &ov7670_info, NULL); if (cam->sensor == NULL) { diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h index a3a097a45e78..b828b1bb59d3 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.h +++ b/drivers/media/platform/marvell-ccic/mcam-core.h @@ -112,7 +112,6 @@ struct mcam_camera { short int use_smbus; /* SMBUS or straight I2c? */ enum mcam_buffer_mode buffer_mode; - int mclk_min; /* The minimal value of mclk */ int mclk_src; /* which clock source the mclk derives from */ int mclk_div; /* Clock Divider Value for MCLK */ @@ -152,7 +151,6 @@ struct mcam_camera { */ struct video_device vdev; struct v4l2_subdev *sensor; - unsigned short sensor_addr; /* Videobuf2 stuff */ struct vb2_queue vb_queue; diff --git a/drivers/media/platform/marvell-ccic/mmp-driver.c b/drivers/media/platform/marvell-ccic/mmp-driver.c index 9c4c7d37d0df..25a4e2b580f4 100644 --- a/drivers/media/platform/marvell-ccic/mmp-driver.c +++ b/drivers/media/platform/marvell-ccic/mmp-driver.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -330,7 +329,6 @@ static int mmpcam_probe(struct platform_device *pdev) mcam->calc_dphy = mmpcam_calc_dphy; mcam->dev = &pdev->dev; mcam->use_smbus = 0; - mcam->mclk_min = pdata->mclk_min; mcam->mclk_src = pdata->mclk_src; mcam->mclk_div = pdata->mclk_div; mcam->bus_type = pdata->bus_type; diff --git a/include/linux/platform_data/media/mmp-camera.h b/include/linux/platform_data/media/mmp-camera.h index d2d3a443eedf..4c3a80a45883 100644 --- a/include/linux/platform_data/media/mmp-camera.h +++ b/include/linux/platform_data/media/mmp-camera.h @@ -16,7 +16,6 @@ struct mmp_camera_platform_data { int sensor_power_gpio; int sensor_reset_gpio; enum v4l2_mbus_type bus_type; - int mclk_min; /* The minimal value of MCLK */ int mclk_src; /* which clock source the MCLK derives from */ int mclk_div; /* Clock Divider Value for MCLK */ /* -- cgit v1.2.3 From 3eefe36cc00c5391b1ca2a68c5f01e9aa127c2a6 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 28 May 2019 05:07:30 -0400 Subject: media: marvell-ccic: use async notifier to get the sensor An instance of a sensor on DT-based MMP2 platform is always going to be created asynchronously. Let's move the manual device creation away from the core to the Cafe driver (used on OLPC XO-1, not present in DT) and set up appropriate async matches: I2C on Cafe, FWNODE on MMP (OLPC XO-1.75). Signed-off-by: Lubomir Rintel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/marvell-ccic/cafe-driver.c | 49 +++++-- drivers/media/platform/marvell-ccic/mcam-core.c | 157 +++++++++++++++------- drivers/media/platform/marvell-ccic/mcam-core.h | 5 +- drivers/media/platform/marvell-ccic/mmp-driver.c | 27 ++-- include/linux/platform_data/media/mmp-camera.h | 1 - 5 files changed, 162 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/platform/marvell-ccic/cafe-driver.c b/drivers/media/platform/marvell-ccic/cafe-driver.c index cd108b14b715..fe85368675cb 100644 --- a/drivers/media/platform/marvell-ccic/cafe-driver.c +++ b/drivers/media/platform/marvell-ccic/cafe-driver.c @@ -9,6 +9,7 @@ * * Copyright 2006-11 One Laptop Per Child Association, Inc. * Copyright 2006-11 Jonathan Corbet + * Copyright 2018 Lubomir Rintel * * Written by Jonathan Corbet, corbet@lwn.net. * @@ -25,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +52,7 @@ struct cafe_camera { int registered; /* Fully initialized? */ struct mcam_camera mcam; struct pci_dev *pdev; + struct i2c_adapter *i2c_adapter; wait_queue_head_t smbus_wait; /* Waiting on i2c events */ }; @@ -349,15 +352,15 @@ static int cafe_smbus_setup(struct cafe_camera *cam) return ret; } - cam->mcam.i2c_adapter = adap; + cam->i2c_adapter = adap; cafe_smbus_enable_irq(cam); return 0; } static void cafe_smbus_shutdown(struct cafe_camera *cam) { - i2c_del_adapter(cam->mcam.i2c_adapter); - kfree(cam->mcam.i2c_adapter); + i2c_del_adapter(cam->i2c_adapter); + kfree(cam->i2c_adapter); } @@ -450,6 +453,29 @@ static irqreturn_t cafe_irq(int irq, void *data) return IRQ_RETVAL(handled); } +/* -------------------------------------------------------------------------- */ + +static struct ov7670_config sensor_cfg = { + /* + * Exclude QCIF mode, because it only captures a tiny portion + * of the sensor FOV + */ + .min_width = 320, + .min_height = 240, + + /* + * Set the clock speed for the XO 1; I don't believe this + * driver has ever run anywhere else. + */ + .clock_speed = 45, + .use_smbus = 1, +}; + +struct i2c_board_info ov7670_info = { + .type = "ov7670", + .addr = 0x42 >> 1, + .platform_data = &sensor_cfg, +}; /* -------------------------------------------------------------------------- */ /* @@ -479,12 +505,6 @@ static int cafe_pci_probe(struct pci_dev *pdev, mcam->plat_power_down = cafe_ctlr_power_down; mcam->dev = &pdev->dev; snprintf(mcam->bus_info, sizeof(mcam->bus_info), "PCI:%s", pci_name(pdev)); - /* - * Set the clock speed for the XO 1; I don't believe this - * driver has ever run anywhere else. - */ - mcam->clock_speed = 45; - mcam->use_smbus = 1; /* * Vmalloc mode for buffers is traditional with this driver. * We *might* be able to run DMA_contig, especially on a system @@ -525,12 +545,21 @@ static int cafe_pci_probe(struct pci_dev *pdev, if (ret) goto out_pdown; + mcam->asd.match_type = V4L2_ASYNC_MATCH_I2C; + mcam->asd.match.i2c.adapter_id = i2c_adapter_id(cam->i2c_adapter); + mcam->asd.match.i2c.address = ov7670_info.addr; + ret = mccic_register(mcam); - if (ret == 0) { + if (ret) + goto out_smbus_shutdown; + + if (i2c_new_device(cam->i2c_adapter, &ov7670_info)) { cam->registered = 1; return 0; } + mccic_shutdown(mcam); +out_smbus_shutdown: cafe_smbus_shutdown(cam); out_pdown: cafe_ctlr_power_down(mcam); diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 76641d5211ab..7dc7d9d91782 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -4,6 +4,7 @@ * so it needs platform-specific support outside of the core. * * Copyright 2011 Jonathan Corbet corbet@lwn.net + * Copyright 2018 Lubomir Rintel */ #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -93,6 +93,9 @@ MODULE_PARM_DESC(buffer_mode, #define sensor_call(cam, o, f, args...) \ v4l2_subdev_call(cam->sensor, o, f, ##args) +#define notifier_to_mcam(notifier) \ + container_of(notifier, struct mcam_camera, notifier) + static struct mcam_format_struct { __u8 *desc; __u32 pixelformat; @@ -1715,23 +1718,94 @@ EXPORT_SYMBOL_GPL(mccic_irq); /* * Registration and such. */ -static struct ov7670_config sensor_cfg = { + +static int mccic_notify_bound(struct v4l2_async_notifier *notifier, + struct v4l2_subdev *subdev, struct v4l2_async_subdev *asd) +{ + struct mcam_camera *cam = notifier_to_mcam(notifier); + int ret; + + mutex_lock(&cam->s_mutex); + if (cam->sensor) { + cam_err(cam, "sensor already bound\n"); + ret = -EBUSY; + goto out; + } + + v4l2_set_subdev_hostdata(subdev, cam); + cam->sensor = subdev; + + ret = mcam_cam_init(cam); + if (ret) { + cam->sensor = NULL; + goto out; + } + + ret = mcam_setup_vb2(cam); + if (ret) { + cam->sensor = NULL; + goto out; + } + + cam->vdev = mcam_v4l_template; + cam->vdev.v4l2_dev = &cam->v4l2_dev; + cam->vdev.lock = &cam->s_mutex; + cam->vdev.queue = &cam->vb_queue; + video_set_drvdata(&cam->vdev, cam); + ret = video_register_device(&cam->vdev, VFL_TYPE_GRABBER, -1); + if (ret) { + cam->sensor = NULL; + goto out; + } + + cam_dbg(cam, "sensor %s bound\n", subdev->name); +out: + mutex_unlock(&cam->s_mutex); + return ret; +} + +static void mccic_notify_unbind(struct v4l2_async_notifier *notifier, + struct v4l2_subdev *subdev, struct v4l2_async_subdev *asd) +{ + struct mcam_camera *cam = notifier_to_mcam(notifier); + + mutex_lock(&cam->s_mutex); + if (cam->sensor != subdev) { + cam_err(cam, "sensor %s not bound\n", subdev->name); + goto out; + } + + video_unregister_device(&cam->vdev); + cam->sensor = NULL; + cam_dbg(cam, "sensor %s unbound\n", subdev->name); + +out: + mutex_unlock(&cam->s_mutex); +} + +static int mccic_notify_complete(struct v4l2_async_notifier *notifier) +{ + struct mcam_camera *cam = notifier_to_mcam(notifier); + int ret; + /* - * Exclude QCIF mode, because it only captures a tiny portion - * of the sensor FOV + * Get the v4l2 setup done. */ - .min_width = 320, - .min_height = 240, -}; + ret = v4l2_ctrl_handler_init(&cam->ctrl_handler, 10); + if (!ret) + cam->v4l2_dev.ctrl_handler = &cam->ctrl_handler; + + return ret; +} +static const struct v4l2_async_notifier_operations mccic_notify_ops = { + .bound = mccic_notify_bound, + .unbind = mccic_notify_unbind, + .complete = mccic_notify_complete, +}; int mccic_register(struct mcam_camera *cam) { - struct i2c_board_info ov7670_info = { - .type = "ov7670", - .addr = 0x42 >> 1, - .platform_data = &sensor_cfg, - }; int ret; /* @@ -1744,17 +1818,20 @@ int mccic_register(struct mcam_camera *cam) printk(KERN_ERR "marvell-cam: Cafe can't do S/G I/O, attempting vmalloc mode instead\n"); cam->buffer_mode = B_vmalloc; } + if (!mcam_buffer_mode_supported(cam->buffer_mode)) { printk(KERN_ERR "marvell-cam: buffer mode %d unsupported\n", cam->buffer_mode); - return -EINVAL; + ret = -EINVAL; + goto out; } + /* * Register with V4L */ ret = v4l2_device_register(cam->dev, &cam->v4l2_dev); if (ret) - return ret; + goto out; mutex_init(&cam->s_mutex); cam->state = S_NOTREADY; @@ -1764,43 +1841,20 @@ int mccic_register(struct mcam_camera *cam) mcam_ctlr_init(cam); /* - * Get the v4l2 setup done. + * Register sensor notifier. */ - ret = v4l2_ctrl_handler_init(&cam->ctrl_handler, 10); - if (ret) - goto out_unregister; - cam->v4l2_dev.ctrl_handler = &cam->ctrl_handler; - - /* - * Try to find the sensor. - */ - sensor_cfg.clock_speed = cam->clock_speed; - sensor_cfg.use_smbus = cam->use_smbus; - cam->sensor = v4l2_i2c_new_subdev_board(&cam->v4l2_dev, - cam->i2c_adapter, &ov7670_info, NULL); - if (cam->sensor == NULL) { - ret = -ENODEV; - goto out_unregister; + v4l2_async_notifier_init(&cam->notifier); + ret = v4l2_async_notifier_add_subdev(&cam->notifier, &cam->asd); + if (ret) { + cam_warn(cam, "failed to add subdev to a notifier"); + goto out; } - ret = mcam_cam_init(cam); - if (ret) - goto out_unregister; - - ret = mcam_setup_vb2(cam); - if (ret) - goto out_unregister; - - mutex_lock(&cam->s_mutex); - cam->vdev = mcam_v4l_template; - cam->vdev.v4l2_dev = &cam->v4l2_dev; - cam->vdev.lock = &cam->s_mutex; - cam->vdev.queue = &cam->vb_queue; - video_set_drvdata(&cam->vdev, cam); - ret = video_register_device(&cam->vdev, VFL_TYPE_GRABBER, -1); - if (ret) { - mutex_unlock(&cam->s_mutex); - goto out_unregister; + cam->notifier.ops = &mccic_notify_ops; + ret = v4l2_async_notifier_register(&cam->v4l2_dev, &cam->notifier); + if (ret < 0) { + cam_warn(cam, "failed to register a sensor notifier"); + goto out; } /* @@ -1811,11 +1865,10 @@ int mccic_register(struct mcam_camera *cam) cam_warn(cam, "Unable to alloc DMA buffers at load will try again later."); } - mutex_unlock(&cam->s_mutex); return 0; -out_unregister: - v4l2_ctrl_handler_free(&cam->ctrl_handler); +out: + v4l2_async_notifier_unregister(&cam->notifier); v4l2_device_unregister(&cam->v4l2_dev); return ret; } @@ -1835,8 +1888,8 @@ void mccic_shutdown(struct mcam_camera *cam) } if (cam->buffer_mode == B_vmalloc) mcam_free_dma_bufs(cam); - video_unregister_device(&cam->vdev); v4l2_ctrl_handler_free(&cam->ctrl_handler); + v4l2_async_notifier_unregister(&cam->notifier); v4l2_device_unregister(&cam->v4l2_dev); } EXPORT_SYMBOL_GPL(mccic_shutdown); diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h index b828b1bb59d3..4a72213aca1a 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.h +++ b/drivers/media/platform/marvell-ccic/mcam-core.h @@ -102,14 +102,11 @@ struct mcam_camera { * These fields should be set by the platform code prior to * calling mcam_register(). */ - struct i2c_adapter *i2c_adapter; unsigned char __iomem *regs; unsigned regs_size; /* size in bytes of the register space */ spinlock_t dev_lock; struct device *dev; /* For messages, dma alloc */ enum mcam_chip_id chip_id; - short int clock_speed; /* Sensor clock speed, default 30 */ - short int use_smbus; /* SMBUS or straight I2c? */ enum mcam_buffer_mode buffer_mode; int mclk_src; /* which clock source the mclk derives from */ @@ -150,6 +147,8 @@ struct mcam_camera { * Subsystem structures. */ struct video_device vdev; + struct v4l2_async_notifier notifier; + struct v4l2_async_subdev asd; struct v4l2_subdev *sensor; /* Videobuf2 stuff */ diff --git a/drivers/media/platform/marvell-ccic/mmp-driver.c b/drivers/media/platform/marvell-ccic/mmp-driver.c index 492663a8a29d..92061e4adbfd 100644 --- a/drivers/media/platform/marvell-ccic/mmp-driver.c +++ b/drivers/media/platform/marvell-ccic/mmp-driver.c @@ -4,12 +4,12 @@ * to work with the Armada 610 as used in the OLPC 1.75 system. * * Copyright 2011 Jonathan Corbet + * Copyright 2018 Lubomir Rintel */ #include #include #include -#include #include #include #include @@ -314,6 +314,7 @@ static int mmpcam_probe(struct platform_device *pdev) struct mmp_camera *cam; struct mcam_camera *mcam; struct resource *res; + struct fwnode_handle *ep; struct mmp_camera_platform_data *pdata; int ret; @@ -328,7 +329,6 @@ static int mmpcam_probe(struct platform_device *pdev) mcam->plat_power_down = mmpcam_power_down; mcam->calc_dphy = mmpcam_calc_dphy; mcam->dev = &pdev->dev; - mcam->use_smbus = 0; pdata = pdev->dev.platform_data; if (pdata) { mcam->mclk_src = pdata->mclk_src; @@ -372,15 +372,6 @@ static int mmpcam_probe(struct platform_device *pdev) cam->power_regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(cam->power_regs)) return PTR_ERR(cam->power_regs); - /* - * Find the i2c adapter. This assumes, of course, that the - * i2c bus is already up and functioning. - */ - mcam->i2c_adapter = platform_get_drvdata(pdata->i2c_device); - if (mcam->i2c_adapter == NULL) { - dev_err(&pdev->dev, "No i2c adapter\n"); - return -ENODEV; - } /* * Sensor GPIO pins. */ @@ -403,6 +394,19 @@ static int mmpcam_probe(struct platform_device *pdev) mcam_init_clk(mcam); + /* + * Create a match of the sensor against its OF node. + */ + ep = fwnode_graph_get_next_endpoint(of_fwnode_handle(pdev->dev.of_node), + NULL); + if (!ep) + return -ENODEV; + + mcam->asd.match_type = V4L2_ASYNC_MATCH_FWNODE; + mcam->asd.match.fwnode = fwnode_graph_get_remote_port_parent(ep); + + fwnode_handle_put(ep); + /* * Power the device up and hand it off to the core. */ @@ -412,6 +416,7 @@ static int mmpcam_probe(struct platform_device *pdev) ret = mccic_register(mcam); if (ret) goto out_power_down; + /* * Finally, set up our IRQ now that the core is ready to * deal with it. diff --git a/include/linux/platform_data/media/mmp-camera.h b/include/linux/platform_data/media/mmp-camera.h index 4c3a80a45883..c573ebc40035 100644 --- a/include/linux/platform_data/media/mmp-camera.h +++ b/include/linux/platform_data/media/mmp-camera.h @@ -12,7 +12,6 @@ enum dphy3_algo { }; struct mmp_camera_platform_data { - struct platform_device *i2c_device; int sensor_power_gpio; int sensor_reset_gpio; enum v4l2_mbus_type bus_type; -- cgit v1.2.3 From 81a409bfd5517d537097d3cfdfed7f8bf8ac469c Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 28 May 2019 05:07:31 -0400 Subject: media: marvell-ccic: provide a clock for the sensor The sensor needs the MCLK clock running when it's being probed. On platforms where the sensor is instantiated from a DT (MMP2) it is going to happen asynchronously. Therefore, the current modus operandi, where the bridge driver fiddles with the sensor power and clock itself is not going to fly. As the comments wisely note, this doesn't even belong there. Luckily, the ov7670 driver is already able to control its power and reset lines, we can just drop the MMP platform glue altogether. It also requests the clock via the standard clock subsystem. Good -- let's set up a clock instance so that the sensor can ask us to enable the clock. Note that this is pretty dumb at the moment: the clock is hardwired to a particular frequency and parent. It was always the case. Signed-off-by: Lubomir Rintel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/marvell-ccic/Kconfig | 2 + drivers/media/platform/marvell-ccic/cafe-driver.c | 9 +- drivers/media/platform/marvell-ccic/mcam-core.c | 172 ++++++++++++++++------ drivers/media/platform/marvell-ccic/mcam-core.h | 3 + drivers/media/platform/marvell-ccic/mmp-driver.c | 152 ++----------------- include/linux/platform_data/media/mmp-camera.h | 2 - 6 files changed, 157 insertions(+), 183 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/platform/marvell-ccic/Kconfig b/drivers/media/platform/marvell-ccic/Kconfig index 86b84474dd8c..3e3f86264762 100644 --- a/drivers/media/platform/marvell-ccic/Kconfig +++ b/drivers/media/platform/marvell-ccic/Kconfig @@ -2,6 +2,7 @@ config VIDEO_CAFE_CCIC tristate "Marvell 88ALP01 (Cafe) CMOS Camera Controller support" depends on PCI && I2C && VIDEO_V4L2 + depends on COMMON_CLK select VIDEO_OV7670 select VIDEOBUF2_VMALLOC select VIDEOBUF2_DMA_CONTIG @@ -15,6 +16,7 @@ config VIDEO_MMP_CAMERA tristate "Marvell Armada 610 integrated camera controller support" depends on I2C && VIDEO_V4L2 depends on ARCH_MMP || COMPILE_TEST + depends on COMMON_CLK select VIDEO_OV7670 select I2C_GPIO select VIDEOBUF2_VMALLOC diff --git a/drivers/media/platform/marvell-ccic/cafe-driver.c b/drivers/media/platform/marvell-ccic/cafe-driver.c index fe85368675cb..16602628f895 100644 --- a/drivers/media/platform/marvell-ccic/cafe-driver.c +++ b/drivers/media/platform/marvell-ccic/cafe-driver.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "mcam-core.h" @@ -531,11 +532,10 @@ static int cafe_pci_probe(struct pci_dev *pdev, goto out_iounmap; /* - * Initialize the controller and leave it powered up. It will - * stay that way until the sensor driver shows up. + * Initialize the controller. */ cafe_ctlr_init(mcam); - cafe_ctlr_power_up(mcam); + /* * Set up I2C/SMBUS communications. We have to drop the mutex here * because the sensor could attach in this call chain, leading to @@ -553,6 +553,9 @@ static int cafe_pci_probe(struct pci_dev *pdev, if (ret) goto out_smbus_shutdown; + clkdev_create(mcam->mclk, "xclk", "%d-%04x", + i2c_adapter_id(cam->i2c_adapter), ov7670_info.addr); + if (i2c_new_device(cam->i2c_adapter, &ov7670_info)) { cam->registered = 1; return 0; diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 7dc7d9d91782..f9ac1547d093 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -303,9 +304,6 @@ static void mcam_enable_mipi(struct mcam_camera *mcam) */ mcam_reg_write(mcam, REG_CSI2_CTRL0, CSI2_C0_MIPI_EN | CSI2_C0_ACT_LANE(mcam->lane)); - mcam_reg_write(mcam, REG_CLKCTRL, - (mcam->mclk_src << 29) | mcam->mclk_div); - mcam->mipi_enabled = true; } } @@ -830,31 +828,6 @@ static void mcam_ctlr_irq_disable(struct mcam_camera *cam) mcam_reg_clear_bit(cam, REG_IRQMASK, FRAMEIRQS); } - - -static void mcam_ctlr_init(struct mcam_camera *cam) -{ - unsigned long flags; - - spin_lock_irqsave(&cam->dev_lock, flags); - /* - * Make sure it's not powered down. - */ - mcam_reg_clear_bit(cam, REG_CTRL1, C1_PWRDWN); - /* - * Turn off the enable bit. It sure should be off anyway, - * but it's good to be sure. - */ - mcam_reg_clear_bit(cam, REG_CTRL0, C0_ENABLE); - /* - * Clock the sensor appropriately. Controller clock should - * be 48MHz, sensor "typical" value is half that. - */ - mcam_reg_write_mask(cam, REG_CLKCTRL, 2, CLK_DIV_MASK); - spin_unlock_irqrestore(&cam->dev_lock, flags); -} - - /* * Stop the controller, and don't return until we're really sure that no * further DMA is going on. @@ -898,14 +871,15 @@ static int mcam_ctlr_power_up(struct mcam_camera *cam) int ret; spin_lock_irqsave(&cam->dev_lock, flags); - ret = cam->plat_power_up(cam); - if (ret) { - spin_unlock_irqrestore(&cam->dev_lock, flags); - return ret; + if (cam->plat_power_up) { + ret = cam->plat_power_up(cam); + if (ret) { + spin_unlock_irqrestore(&cam->dev_lock, flags); + return ret; + } } mcam_reg_clear_bit(cam, REG_CTRL1, C1_PWRDWN); spin_unlock_irqrestore(&cam->dev_lock, flags); - msleep(5); /* Just to be sure */ return 0; } @@ -920,10 +894,101 @@ static void mcam_ctlr_power_down(struct mcam_camera *cam) * power down routine. */ mcam_reg_set_bit(cam, REG_CTRL1, C1_PWRDWN); - cam->plat_power_down(cam); + if (cam->plat_power_down) + cam->plat_power_down(cam); spin_unlock_irqrestore(&cam->dev_lock, flags); } +/* ---------------------------------------------------------------------- */ +/* + * Controller clocks. + */ +static void mcam_clk_enable(struct mcam_camera *mcam) +{ + unsigned int i; + + for (i = 0; i < NR_MCAM_CLK; i++) { + if (!IS_ERR(mcam->clk[i])) + clk_prepare_enable(mcam->clk[i]); + } +} + +static void mcam_clk_disable(struct mcam_camera *mcam) +{ + int i; + + for (i = NR_MCAM_CLK - 1; i >= 0; i--) { + if (!IS_ERR(mcam->clk[i])) + clk_disable_unprepare(mcam->clk[i]); + } +} + +/* ---------------------------------------------------------------------- */ +/* + * Master sensor clock. + */ +static int mclk_prepare(struct clk_hw *hw) +{ + struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); + + clk_prepare(cam->clk[0]); + return 0; +} + +static void mclk_unprepare(struct clk_hw *hw) +{ + struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); + + clk_unprepare(cam->clk[0]); +} + +static int mclk_enable(struct clk_hw *hw) +{ + struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); + int mclk_src; + int mclk_div; + + /* + * Clock the sensor appropriately. Controller clock should + * be 48MHz, sensor "typical" value is half that. + */ + if (cam->bus_type == V4L2_MBUS_CSI2_DPHY) { + mclk_src = cam->mclk_src; + mclk_div = cam->mclk_div; + } else { + mclk_src = 3; + mclk_div = 2; + } + + clk_enable(cam->clk[0]); + mcam_reg_write(cam, REG_CLKCTRL, (mclk_src << 29) | mclk_div); + mcam_ctlr_power_up(cam); + + return 0; +} + +static void mclk_disable(struct clk_hw *hw) +{ + struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); + + mcam_ctlr_power_down(cam); + clk_disable(cam->clk[0]); +} + +static unsigned long mclk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + return 48000000; +} + +static const struct clk_ops mclk_ops = { + .prepare = mclk_prepare, + .unprepare = mclk_unprepare, + .enable = mclk_enable, + .disable = mclk_disable, + .recalc_rate = mclk_recalc_rate, +}; + /* -------------------------------------------------------------------- */ /* * Communications with the sensor. @@ -948,7 +1013,6 @@ static int mcam_cam_init(struct mcam_camera *cam) ret = __mcam_cam_reset(cam); /* Get/set parameters? */ cam->state = S_IDLE; - mcam_ctlr_power_down(cam); return ret; } @@ -1584,9 +1648,10 @@ static int mcam_v4l_open(struct file *filp) if (ret) goto out; if (v4l2_fh_is_singular_file(filp)) { - ret = mcam_ctlr_power_up(cam); + ret = sensor_call(cam, core, s_power, 1); if (ret) goto out; + mcam_clk_enable(cam); __mcam_cam_reset(cam); mcam_set_config_needed(cam, 1); } @@ -1608,7 +1673,8 @@ static int mcam_v4l_release(struct file *filp) _vb2_fop_release(filp, NULL); if (last_open) { mcam_disable_mipi(cam); - mcam_ctlr_power_down(cam); + sensor_call(cam, core, s_power, 0); + mcam_clk_disable(cam); if (cam->buffer_mode == B_vmalloc && alloc_bufs_at_read) mcam_free_dma_bufs(cam); } @@ -1806,6 +1872,7 @@ static const struct v4l2_async_notifier_operations mccic_notify_ops = { int mccic_register(struct mcam_camera *cam) { + struct clk_init_data mclk_init = { }; int ret; /* @@ -1838,7 +1905,6 @@ int mccic_register(struct mcam_camera *cam) mcam_set_config_needed(cam, 1); cam->pix_format = mcam_def_pix_format; cam->mbus_code = mcam_def_mbus_code; - mcam_ctlr_init(cam); /* * Register sensor notifier. @@ -1857,6 +1923,26 @@ int mccic_register(struct mcam_camera *cam) goto out; } + /* + * Register sensor master clock. + */ + mclk_init.parent_names = NULL; + mclk_init.num_parents = 0; + mclk_init.ops = &mclk_ops; + mclk_init.name = "mclk"; + + of_property_read_string(cam->dev->of_node, "clock-output-names", + &mclk_init.name); + + cam->mclk_hw.init = &mclk_init; + + cam->mclk = devm_clk_register(cam->dev, &cam->mclk_hw); + if (IS_ERR(cam->mclk)) { + ret = PTR_ERR(cam->mclk); + dev_err(cam->dev, "can't register clock\n"); + goto out; + } + /* * If so requested, try to get our DMA buffers now. */ @@ -1884,7 +1970,7 @@ void mccic_shutdown(struct mcam_camera *cam) */ if (!list_empty(&cam->vdev.fh_list)) { cam_warn(cam, "Removing a device with users!\n"); - mcam_ctlr_power_down(cam); + sensor_call(cam, core, s_power, 0); } if (cam->buffer_mode == B_vmalloc) mcam_free_dma_bufs(cam); @@ -1906,7 +1992,8 @@ void mccic_suspend(struct mcam_camera *cam) enum mcam_state cstate = cam->state; mcam_ctlr_stop_dma(cam); - mcam_ctlr_power_down(cam); + sensor_call(cam, core, s_power, 0); + mcam_clk_disable(cam); cam->state = cstate; } mutex_unlock(&cam->s_mutex); @@ -1919,14 +2006,15 @@ int mccic_resume(struct mcam_camera *cam) mutex_lock(&cam->s_mutex); if (!list_empty(&cam->vdev.fh_list)) { - ret = mcam_ctlr_power_up(cam); + mcam_clk_enable(cam); + ret = sensor_call(cam, core, s_power, 1); if (ret) { mutex_unlock(&cam->s_mutex); return ret; } __mcam_cam_reset(cam); } else { - mcam_ctlr_power_down(cam); + sensor_call(cam, core, s_power, 0); } mutex_unlock(&cam->s_mutex); diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h index 4a72213aca1a..2e3a7567a76a 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.h +++ b/drivers/media/platform/marvell-ccic/mcam-core.h @@ -8,6 +8,7 @@ #define _MCAM_CORE_H #include +#include #include #include #include @@ -125,6 +126,8 @@ struct mcam_camera { /* clock tree support */ struct clk *clk[NR_MCAM_CLK]; + struct clk_hw mclk_hw; + struct clk *mclk; /* * Callbacks from the core to the platform code. diff --git a/drivers/media/platform/marvell-ccic/mmp-driver.c b/drivers/media/platform/marvell-ccic/mmp-driver.c index 92061e4adbfd..450693e6657d 100644 --- a/drivers/media/platform/marvell-ccic/mmp-driver.c +++ b/drivers/media/platform/marvell-ccic/mmp-driver.c @@ -20,9 +20,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -36,7 +34,6 @@ MODULE_LICENSE("GPL"); static char *mcam_clks[] = {"axi", "func", "phy"}; struct mmp_camera { - void __iomem *power_regs; struct platform_device *pdev; struct mcam_camera mcam; struct list_head devlist; @@ -92,94 +89,6 @@ static struct mmp_camera *mmpcam_find_device(struct platform_device *pdev) return NULL; } - - - -/* - * Power-related registers; this almost certainly belongs - * somewhere else. - * - * ARMADA 610 register manual, sec 7.2.1, p1842. - */ -#define CPU_SUBSYS_PMU_BASE 0xd4282800 -#define REG_CCIC_DCGCR 0x28 /* CCIC dyn clock gate ctrl reg */ -#define REG_CCIC_CRCR 0x50 /* CCIC clk reset ctrl reg */ - -static void mcam_clk_enable(struct mcam_camera *mcam) -{ - unsigned int i; - - for (i = 0; i < NR_MCAM_CLK; i++) { - if (!IS_ERR(mcam->clk[i])) - clk_prepare_enable(mcam->clk[i]); - } -} - -static void mcam_clk_disable(struct mcam_camera *mcam) -{ - int i; - - for (i = NR_MCAM_CLK - 1; i >= 0; i--) { - if (!IS_ERR(mcam->clk[i])) - clk_disable_unprepare(mcam->clk[i]); - } -} - -/* - * Power control. - */ -static void mmpcam_power_up_ctlr(struct mmp_camera *cam) -{ - iowrite32(0x3f, cam->power_regs + REG_CCIC_DCGCR); - iowrite32(0x3805b, cam->power_regs + REG_CCIC_CRCR); - mdelay(1); -} - -static int mmpcam_power_up(struct mcam_camera *mcam) -{ - struct mmp_camera *cam = mcam_to_cam(mcam); - struct mmp_camera_platform_data *pdata; - -/* - * Turn on power and clocks to the controller. - */ - mmpcam_power_up_ctlr(cam); - mcam_clk_enable(mcam); -/* - * Provide power to the sensor. - */ - mcam_reg_write(mcam, REG_CLKCTRL, 0x60000002); - pdata = cam->pdev->dev.platform_data; - gpio_set_value(pdata->sensor_power_gpio, 1); - mdelay(5); - mcam_reg_clear_bit(mcam, REG_CTRL1, 0x10000000); - gpio_set_value(pdata->sensor_reset_gpio, 0); /* reset is active low */ - mdelay(5); - gpio_set_value(pdata->sensor_reset_gpio, 1); /* reset is active low */ - mdelay(5); - - return 0; -} - -static void mmpcam_power_down(struct mcam_camera *mcam) -{ - struct mmp_camera *cam = mcam_to_cam(mcam); - struct mmp_camera_platform_data *pdata; -/* - * Turn off clocks and set reset lines - */ - iowrite32(0, cam->power_regs + REG_CCIC_DCGCR); - iowrite32(0, cam->power_regs + REG_CCIC_CRCR); -/* - * Shut down the sensor. - */ - pdata = cam->pdev->dev.platform_data; - gpio_set_value(pdata->sensor_power_gpio, 0); - gpio_set_value(pdata->sensor_reset_gpio, 0); - - mcam_clk_disable(mcam); -} - /* * calc the dphy register values * There are three dphy registers being used. @@ -325,8 +234,6 @@ static int mmpcam_probe(struct platform_device *pdev) INIT_LIST_HEAD(&cam->devlist); mcam = &cam->mcam; - mcam->plat_power_up = mmpcam_power_up; - mcam->plat_power_down = mmpcam_power_down; mcam->calc_dphy = mmpcam_calc_dphy; mcam->dev = &pdev->dev; pdata = pdev->dev.platform_data; @@ -364,33 +271,6 @@ static int mmpcam_probe(struct platform_device *pdev) if (IS_ERR(mcam->regs)) return PTR_ERR(mcam->regs); mcam->regs_size = resource_size(res); - /* - * Power/clock memory is elsewhere; get it too. Perhaps this - * should really be managed outside of this driver? - */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - cam->power_regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(cam->power_regs)) - return PTR_ERR(cam->power_regs); - /* - * Sensor GPIO pins. - */ - ret = devm_gpio_request(&pdev->dev, pdata->sensor_power_gpio, - "cam-power"); - if (ret) { - dev_err(&pdev->dev, "Can't get sensor power gpio %d", - pdata->sensor_power_gpio); - return ret; - } - gpio_direction_output(pdata->sensor_power_gpio, 0); - ret = devm_gpio_request(&pdev->dev, pdata->sensor_reset_gpio, - "cam-reset"); - if (ret) { - dev_err(&pdev->dev, "Can't get sensor reset gpio %d", - pdata->sensor_reset_gpio); - return ret; - } - gpio_direction_output(pdata->sensor_reset_gpio, 0); mcam_init_clk(mcam); @@ -408,14 +288,21 @@ static int mmpcam_probe(struct platform_device *pdev) fwnode_handle_put(ep); /* - * Power the device up and hand it off to the core. + * Register the device with the core. */ - ret = mmpcam_power_up(mcam); - if (ret) - return ret; ret = mccic_register(mcam); if (ret) - goto out_power_down; + return ret; + + /* + * Add OF clock provider. + */ + ret = of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, + mcam->mclk); + if (ret) { + dev_err(&pdev->dev, "can't add DT clock provider\n"); + goto out; + } /* * Finally, set up our IRQ now that the core is ready to @@ -424,7 +311,7 @@ static int mmpcam_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (res == NULL) { ret = -ENODEV; - goto out_unregister; + goto out; } cam->irq = res->start; ret = devm_request_irq(&pdev->dev, cam->irq, mmpcam_irq, IRQF_SHARED, @@ -434,10 +321,10 @@ static int mmpcam_probe(struct platform_device *pdev) return 0; } -out_unregister: +out: + fwnode_handle_put(mcam->asd.match.fwnode); mccic_shutdown(mcam); -out_power_down: - mmpcam_power_down(mcam); + return ret; } @@ -448,7 +335,6 @@ static int mmpcam_remove(struct mmp_camera *cam) mmpcam_remove_device(cam); mccic_shutdown(mcam); - mmpcam_power_down(mcam); return 0; } @@ -480,12 +366,6 @@ static int mmpcam_resume(struct platform_device *pdev) { struct mmp_camera *cam = mmpcam_find_device(pdev); - /* - * Power up unconditionally just in case the core tries to - * touch a register even if nothing was active before; trust - * me, it's better this way. - */ - mmpcam_power_up_ctlr(cam); return mccic_resume(&cam->mcam); } diff --git a/include/linux/platform_data/media/mmp-camera.h b/include/linux/platform_data/media/mmp-camera.h index c573ebc40035..53adaab64f28 100644 --- a/include/linux/platform_data/media/mmp-camera.h +++ b/include/linux/platform_data/media/mmp-camera.h @@ -12,8 +12,6 @@ enum dphy3_algo { }; struct mmp_camera_platform_data { - int sensor_power_gpio; - int sensor_reset_gpio; enum v4l2_mbus_type bus_type; int mclk_src; /* which clock source the MCLK derives from */ int mclk_div; /* Clock Divider Value for MCLK */ -- cgit v1.2.3 From 65d80db2ee92330269e90313c6af782036f4d23d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jun 2019 20:35:27 +0200 Subject: regulator: s2mps11: Add support for disabling S2MPS11 regulators in suspend The driver supported turning off regulators in suspend only for S2MPS14 device. However this makes also sense for S2MPS11 and can reduce the power consumption during suspend to RAM. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- drivers/regulator/s2mps11.c | 210 ++++++++++++++++++++---------------- include/linux/mfd/samsung/s2mps11.h | 5 + 2 files changed, 120 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index 93570712eb56..9c06ecd80a90 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -34,7 +34,7 @@ struct s2mps11_info { enum sec_device_type dev_type; /* - * One bit for each S2MPS13/S2MPS14/S2MPU02 regulator whether + * One bit for each S2MPS11/S2MPS13/S2MPS14/S2MPU02 regulator whether * the suspend mode was enabled. */ DECLARE_BITMAP(suspend_state, S2MPS_REGULATOR_MAX); @@ -225,27 +225,133 @@ ramp_disable: 1 << enable_shift, 0); } +static int s2mps11_regulator_enable(struct regulator_dev *rdev) +{ + struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev); + int rdev_id = rdev_get_id(rdev); + unsigned int val; + + switch (s2mps11->dev_type) { + case S2MPS11X: + if (test_bit(rdev_id, s2mps11->suspend_state)) + val = S2MPS14_ENABLE_SUSPEND; + else + val = rdev->desc->enable_mask; + break; + case S2MPS13X: + case S2MPS14X: + if (test_bit(rdev_id, s2mps11->suspend_state)) + val = S2MPS14_ENABLE_SUSPEND; + else if (s2mps11->ext_control_gpiod[rdev_id]) + val = S2MPS14_ENABLE_EXT_CONTROL; + else + val = rdev->desc->enable_mask; + break; + case S2MPU02: + if (test_bit(rdev_id, s2mps11->suspend_state)) + val = S2MPU02_ENABLE_SUSPEND; + else + val = rdev->desc->enable_mask; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, + rdev->desc->enable_mask, val); +} + +static int s2mps11_regulator_set_suspend_disable(struct regulator_dev *rdev) +{ + int ret; + unsigned int val, state; + struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev); + int rdev_id = rdev_get_id(rdev); + + /* Below LDO should be always on or does not support suspend mode. */ + switch (s2mps11->dev_type) { + case S2MPS11X: + switch (rdev_id) { + case S2MPS11_LDO2: + case S2MPS11_LDO36: + case S2MPS11_LDO37: + case S2MPS11_LDO38: + return 0; + default: + state = S2MPS14_ENABLE_SUSPEND; + break; + } + break; + case S2MPS13X: + case S2MPS14X: + switch (rdev_id) { + case S2MPS14_LDO3: + return 0; + default: + state = S2MPS14_ENABLE_SUSPEND; + break; + } + break; + case S2MPU02: + switch (rdev_id) { + case S2MPU02_LDO13: + case S2MPU02_LDO14: + case S2MPU02_LDO15: + case S2MPU02_LDO17: + case S2MPU02_BUCK7: + state = S2MPU02_DISABLE_SUSPEND; + break; + default: + state = S2MPU02_ENABLE_SUSPEND; + break; + } + break; + default: + return -EINVAL; + } + + ret = regmap_read(rdev->regmap, rdev->desc->enable_reg, &val); + if (ret < 0) + return ret; + + set_bit(rdev_id, s2mps11->suspend_state); + /* + * Don't enable suspend mode if regulator is already disabled because + * this would effectively for a short time turn on the regulator after + * resuming. + * However we still want to toggle the suspend_state bit for regulator + * in case if it got enabled before suspending the system. + */ + if (!(val & rdev->desc->enable_mask)) + return 0; + + return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, + rdev->desc->enable_mask, state); +} + static const struct regulator_ops s2mps11_ldo_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .is_enabled = regulator_is_enabled_regmap, - .enable = regulator_enable_regmap, + .enable = s2mps11_regulator_enable, .disable = regulator_disable_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_suspend_disable = s2mps11_regulator_set_suspend_disable, }; static const struct regulator_ops s2mps11_buck_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .is_enabled = regulator_is_enabled_regmap, - .enable = regulator_enable_regmap, + .enable = s2mps11_regulator_enable, .disable = regulator_disable_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, .set_voltage_time_sel = s2mps11_regulator_set_voltage_time_sel, .set_ramp_delay = s2mps11_set_ramp_delay, + .set_suspend_disable = s2mps11_regulator_set_suspend_disable, }; #define regulator_desc_s2mps11_ldo(num, step) { \ @@ -501,102 +607,16 @@ static const struct regulator_desc s2mps13_regulators[] = { regulator_desc_s2mps13_buck8_10(10, MIN_500_MV, STEP_6_25_MV, 0x10), }; -static int s2mps14_regulator_enable(struct regulator_dev *rdev) -{ - struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev); - int rdev_id = rdev_get_id(rdev); - unsigned int val; - - switch (s2mps11->dev_type) { - case S2MPS13X: - case S2MPS14X: - if (test_bit(rdev_id, s2mps11->suspend_state)) - val = S2MPS14_ENABLE_SUSPEND; - else if (s2mps11->ext_control_gpiod[rdev_id]) - val = S2MPS14_ENABLE_EXT_CONTROL; - else - val = rdev->desc->enable_mask; - break; - case S2MPU02: - if (test_bit(rdev_id, s2mps11->suspend_state)) - val = S2MPU02_ENABLE_SUSPEND; - else - val = rdev->desc->enable_mask; - break; - default: - return -EINVAL; - } - - return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, - rdev->desc->enable_mask, val); -} - -static int s2mps14_regulator_set_suspend_disable(struct regulator_dev *rdev) -{ - int ret; - unsigned int val, state; - struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev); - int rdev_id = rdev_get_id(rdev); - - /* Below LDO should be always on or does not support suspend mode. */ - switch (s2mps11->dev_type) { - case S2MPS13X: - case S2MPS14X: - switch (rdev_id) { - case S2MPS14_LDO3: - return 0; - default: - state = S2MPS14_ENABLE_SUSPEND; - break; - } - break; - case S2MPU02: - switch (rdev_id) { - case S2MPU02_LDO13: - case S2MPU02_LDO14: - case S2MPU02_LDO15: - case S2MPU02_LDO17: - case S2MPU02_BUCK7: - state = S2MPU02_DISABLE_SUSPEND; - break; - default: - state = S2MPU02_ENABLE_SUSPEND; - break; - } - break; - default: - return -EINVAL; - } - - ret = regmap_read(rdev->regmap, rdev->desc->enable_reg, &val); - if (ret < 0) - return ret; - - set_bit(rdev_id, s2mps11->suspend_state); - /* - * Don't enable suspend mode if regulator is already disabled because - * this would effectively for a short time turn on the regulator after - * resuming. - * However we still want to toggle the suspend_state bit for regulator - * in case if it got enabled before suspending the system. - */ - if (!(val & rdev->desc->enable_mask)) - return 0; - - return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, - rdev->desc->enable_mask, state); -} - static const struct regulator_ops s2mps14_reg_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .is_enabled = regulator_is_enabled_regmap, - .enable = s2mps14_regulator_enable, + .enable = s2mps11_regulator_enable, .disable = regulator_disable_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, .set_voltage_time_sel = regulator_set_voltage_time_sel, - .set_suspend_disable = s2mps14_regulator_set_suspend_disable, + .set_suspend_disable = s2mps11_regulator_set_suspend_disable, }; #define regulator_desc_s2mps14_ldo(num, min, step) { \ @@ -888,24 +908,24 @@ static const struct regulator_ops s2mpu02_ldo_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .is_enabled = regulator_is_enabled_regmap, - .enable = s2mps14_regulator_enable, + .enable = s2mps11_regulator_enable, .disable = regulator_disable_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, .set_voltage_time_sel = regulator_set_voltage_time_sel, - .set_suspend_disable = s2mps14_regulator_set_suspend_disable, + .set_suspend_disable = s2mps11_regulator_set_suspend_disable, }; static const struct regulator_ops s2mpu02_buck_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .is_enabled = regulator_is_enabled_regmap, - .enable = s2mps14_regulator_enable, + .enable = s2mps11_regulator_enable, .disable = regulator_disable_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, .set_voltage_time_sel = regulator_set_voltage_time_sel, - .set_suspend_disable = s2mps14_regulator_set_suspend_disable, + .set_suspend_disable = s2mps11_regulator_set_suspend_disable, .set_ramp_delay = s2mpu02_set_ramp_delay, }; diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index 6e7668a389a1..f6c035eb87be 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -188,4 +188,9 @@ enum s2mps11_regulators { #define S2MPS11_BUCK6_RAMP_EN_SHIFT 0 #define S2MPS11_PMIC_EN_SHIFT 6 +/* + * Bits for "enable suspend" (On/Off controlled by PWREN) + * are the same as in S2MPS14: S2MPS14_ENABLE_SUSPEND + */ + #endif /* __LINUX_MFD_S2MPS11_H */ -- cgit v1.2.3 From 8ec59c0f5f4966f89f4e3e3cab81710c7fa959d0 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 17 Jun 2019 17:00:17 +0200 Subject: sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity() The 'struct sched_domain *sd' parameter to arch_scale_cpu_capacity() is unused since commit: 765d0af19f5f ("sched/topology: Remove the ::smt_gain field from 'struct sched_domain'") Remove it. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Viresh Kumar Reviewed-by: Valentin Schneider Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: linux@armlinux.org.uk Cc: quentin.perret@arm.com Cc: rafael@kernel.org Link: https://lkml.kernel.org/r/1560783617-5827-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- arch/arm/kernel/topology.c | 2 +- drivers/base/arch_topology.c | 6 +++--- include/linux/arch_topology.h | 2 +- include/linux/energy_model.h | 2 +- include/linux/sched/topology.h | 14 +++----------- kernel/power/energy_model.c | 2 +- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 6 +++--- kernel/sched/pelt.c | 2 +- kernel/sched/pelt.h | 2 +- kernel/sched/sched.h | 2 +- kernel/sched/topology.c | 8 ++++---- 13 files changed, 22 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 60e375ce1ab2..d17cb1e6d679 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu) topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity); pr_info("CPU%u: update cpu_capacity %lu\n", - cpu, topology_get_cpu_scale(NULL, cpu)); + cpu, topology_get_cpu_scale(cpu)); } #else diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1739d7e1952a..9b09e31ae82f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -43,7 +43,7 @@ static ssize_t cpu_capacity_show(struct device *dev, { struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); + return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); } static void update_topology_flags_workfn(struct work_struct *work); @@ -116,7 +116,7 @@ void topology_normalize_cpu_scale(void) / capacity_scale; topology_set_cpu_scale(cpu, capacity); pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", - cpu, topology_get_cpu_scale(NULL, cpu)); + cpu, topology_get_cpu_scale(cpu)); } } @@ -185,7 +185,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); for_each_cpu(cpu, policy->related_cpus) { - raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) * + raw_capacity[cpu] = topology_get_cpu_scale(cpu) * policy->cpuinfo.max_freq / 1000UL; capacity_scale = max(raw_capacity[cpu], capacity_scale); } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index d9bdc1a7f4e7..1cfe05ea1d89 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_scale); struct sched_domain; static inline -unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) +unsigned long topology_get_cpu_scale(int cpu) { return per_cpu(cpu_scale, cpu); } diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index aa027f7bcb3e..73f8c3cb9588 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -89,7 +89,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, * like schedutil. */ cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + scale_cpu = arch_scale_cpu_capacity(cpu); cs = &pd->table[pd->nr_cap_states - 1]; freq = map_util_freq(max_util, cs->frequency, scale_cpu); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 53afbe07354a..e445d3767cdd 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -196,14 +196,6 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl); # define SD_INIT_NAME(type) #endif -#ifndef arch_scale_cpu_capacity -static __always_inline -unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) -{ - return SCHED_CAPACITY_SCALE; -} -#endif - #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -219,16 +211,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) return true; } +#endif /* !CONFIG_SMP */ + #ifndef arch_scale_cpu_capacity static __always_inline -unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) +unsigned long arch_scale_cpu_capacity(int cpu) { return SCHED_CAPACITY_SCALE; } #endif -#endif /* !CONFIG_SMP */ - static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7d66ee68aaaf..0a9326f5f421 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -223,7 +223,7 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, * All CPUs of a domain must have the same micro-architecture * since they all share the same table. */ - cap = arch_scale_cpu_capacity(NULL, cpu); + cap = arch_scale_cpu_capacity(cpu); if (prev_cap && prev_cap != cap) { pr_err("CPUs of %*pbl must have the same capacity\n", cpumask_pr_args(span)); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 962cf343f798..7c4ce69067c4 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -276,7 +276,7 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); unsigned long util = cpu_util_cfs(rq); - unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); + unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c1ef30861068..8b5bb2ac16e2 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1195,7 +1195,7 @@ static void update_curr_dl(struct rq *rq) &curr->dl); } else { unsigned long scale_freq = arch_scale_freq_capacity(cpu); - unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + unsigned long scale_cpu = arch_scale_cpu_capacity(cpu); scaled_delta_exec = cap_scale(delta_exec, scale_freq); scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3c11dcdedcbc..4f8754157763 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -764,7 +764,7 @@ void post_init_entity_util_avg(struct task_struct *p) struct sched_entity *se = &p->se; struct cfs_rq *cfs_rq = cfs_rq_of(se); struct sched_avg *sa = &se->avg; - long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); + long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq))); long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2; if (cap > 0) { @@ -7646,7 +7646,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long max = arch_scale_cpu_capacity(sd, cpu); + unsigned long max = arch_scale_cpu_capacity(cpu); unsigned long used, free; unsigned long irq; @@ -7671,7 +7671,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) unsigned long capacity = scale_rt_capacity(sd, cpu); struct sched_group *sdg = sd->groups; - cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu); + cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu); if (!capacity) capacity = 1; diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index befce29bd882..42ea66b07b1d 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -366,7 +366,7 @@ int update_irq_load_avg(struct rq *rq, u64 running) * reflect the real amount of computation */ running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); - running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq))); /* * We know the time that has been used by interrupt since last update diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 7489d5f56960..afff644da065 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -79,7 +79,7 @@ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) * Scale the elapsed time to reflect the real amount of * computation */ - delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq))); delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq))); rq->clock_pelt += delta; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b08dee29ef5e..e58ab597ec88 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2248,7 +2248,7 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs) { - unsigned long max = arch_scale_cpu_capacity(NULL, cpu); + unsigned long max = arch_scale_cpu_capacity(cpu); return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 63184cf0d0d7..f751ce0b783e 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1874,10 +1874,10 @@ static struct sched_domain_topology_level unsigned long cap; /* Is there any asymmetry? */ - cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map)); + cap = arch_scale_cpu_capacity(cpumask_first(cpu_map)); for_each_cpu(i, cpu_map) { - if (arch_scale_cpu_capacity(NULL, i) != cap) { + if (arch_scale_cpu_capacity(i) != cap) { asym = true; break; } @@ -1892,7 +1892,7 @@ static struct sched_domain_topology_level * to everyone. */ for_each_cpu(i, cpu_map) { - unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i); + unsigned long max_capacity = arch_scale_cpu_capacity(i); int tl_id = 0; for_each_sd_topology(tl) { @@ -1902,7 +1902,7 @@ static struct sched_domain_topology_level for_each_cpu_and(j, tl->mask(i), cpu_map) { unsigned long capacity; - capacity = arch_scale_cpu_capacity(NULL, j); + capacity = arch_scale_cpu_capacity(j); if (capacity <= max_capacity) continue; -- cgit v1.2.3 From 3c93a0c04dfdcba199982b53b97488b1b1d90eff Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 4 Jun 2019 12:14:55 +0100 Subject: sched/debug: Add a new sched_trace_*() helper functions The new functions allow modules to access internal data structures of unexported struct cfs_rq and struct rq to extract important information from the tracepoints to be introduced in later patches. While at it fix alphabetical order of struct declarations in sched.h Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar Eggemann Cc: Linus Torvalds Cc: Pavankumar Kondeti Cc: Peter Zijlstra Cc: Quentin Perret Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Uwe Kleine-Konig Link: https://lkml.kernel.org/r/20190604111459.2862-3-qais.yousef@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ++++++++- kernel/sched/fair.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1b2590a8d038..044c023875e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ struct audit_context; struct backing_dev_info; struct bio_list; struct blk_plug; +struct capture_control; struct cfs_rq; struct fs_struct; struct futex_pi_state; @@ -47,8 +48,9 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; -struct capture_control; struct robust_list_head; +struct root_domain; +struct rq; struct sched_attr; struct sched_param; struct seq_file; @@ -1920,4 +1922,16 @@ static inline void rseq_syscall(struct pt_regs *regs) #endif +const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); +char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); +int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); + +const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq); +const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); +const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); + +int sched_trace_rq_cpu(struct rq *rq); + +const struct cpumask *sched_trace_rd_span(struct root_domain *rd); + #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4f8754157763..461c3e9a67b2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -275,6 +275,19 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return grp->my_q; } +static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len) +{ + if (!path) + return; + + if (cfs_rq && task_group_is_autogroup(cfs_rq->tg)) + autogroup_path(cfs_rq->tg, path, len); + else if (cfs_rq && cfs_rq->tg->css.cgroup) + cgroup_path(cfs_rq->tg->css.cgroup, path, len); + else + strlcpy(path, "(null)", len); +} + static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); @@ -449,6 +462,12 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return NULL; } +static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len) +{ + if (path) + strlcpy(path, "(null)", len); +} + static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { return true; @@ -10408,3 +10427,83 @@ __init void init_sched_fair_class(void) #endif /* SMP */ } + +/* + * Helper functions to facilitate extracting info from tracepoints. + */ + +const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq) +{ +#ifdef CONFIG_SMP + return cfs_rq ? &cfs_rq->avg : NULL; +#else + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg); + +char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len) +{ + if (!cfs_rq) { + if (str) + strlcpy(str, "(null)", len); + else + return NULL; + } + + cfs_rq_tg_path(cfs_rq, str, len); + return str; +} +EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path); + +int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq) +{ + return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1; +} +EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu); + +const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq) +{ +#ifdef CONFIG_SMP + return rq ? &rq->avg_rt : NULL; +#else + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt); + +const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq) +{ +#ifdef CONFIG_SMP + return rq ? &rq->avg_dl : NULL; +#else + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl); + +const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq) +{ +#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ) + return rq ? &rq->avg_irq : NULL; +#else + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq); + +int sched_trace_rq_cpu(struct rq *rq) +{ + return rq ? cpu_of(rq) : -1; +} +EXPORT_SYMBOL_GPL(sched_trace_rq_cpu); + +const struct cpumask *sched_trace_rd_span(struct root_domain *rd) +{ +#ifdef CONFIG_SMP + return rd ? rd->span : NULL; +#else + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(sched_trace_rd_span); -- cgit v1.2.3 From 69842cba9ace84849bb9b8edcdf2cefccd97901c Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 21 Jun 2019 09:42:02 +0100 Subject: sched/uclamp: Add CPU's clamp buckets refcounting Utilization clamping allows to clamp the CPU's utilization within a [util_min, util_max] range, depending on the set of RUNNABLE tasks on that CPU. Each task references two "clamp buckets" defining its minimum and maximum (util_{min,max}) utilization "clamp values". A CPU's clamp bucket is active if there is at least one RUNNABLE tasks enqueued on that CPU and refcounting that bucket. When a task is {en,de}queued {on,from} a rq, the set of active clamp buckets on that CPU can change. If the set of active clamp buckets changes for a CPU a new "aggregated" clamp value is computed for that CPU. This is because each clamp bucket enforces a different utilization clamp value. Clamp values are always MAX aggregated for both util_min and util_max. This ensures that no task can affect the performance of other co-scheduled tasks which are more boosted (i.e. with higher util_min clamp) or less capped (i.e. with higher util_max clamp). A task has: task_struct::uclamp[clamp_id]::bucket_id to track the "bucket index" of the CPU's clamp bucket it refcounts while enqueued, for each clamp index (clamp_id). A runqueue has: rq::uclamp[clamp_id]::bucket[bucket_id].tasks to track how many RUNNABLE tasks on that CPU refcount each clamp bucket (bucket_id) of a clamp index (clamp_id). It also has a: rq::uclamp[clamp_id]::bucket[bucket_id].value to track the clamp value of each clamp bucket (bucket_id) of a clamp index (clamp_id). The rq::uclamp::bucket[clamp_id][] array is scanned every time it's needed to find a new MAX aggregated clamp value for a clamp_id. This operation is required only when it's dequeued the last task of a clamp bucket tracking the current MAX aggregated clamp value. In this case, the CPU is either entering IDLE or going to schedule a less boosted or more clamped task. The expected number of different clamp values configured at build time is small enough to fit the full unordered array into a single cache line, for configurations of up to 7 buckets. Add to struct rq the basic data structures required to refcount the number of RUNNABLE tasks for each clamp bucket. Add also the max aggregation required to update the rq's clamp value at each enqueue/dequeue event. Use a simple linear mapping of clamp values into clamp buckets. Pre-compute and cache bucket_id to avoid integer divisions at enqueue/dequeue time. Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Cc: Alessio Balsini Cc: Dietmar Eggemann Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Paul Turner Cc: Peter Zijlstra Cc: Quentin Perret Cc: Rafael J . Wysocki Cc: Steve Muckle Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Cc: Viresh Kumar Link: https://lkml.kernel.org/r/20190621084217.8167-2-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar --- include/linux/log2.h | 34 +++++++++ include/linux/sched.h | 39 ++++++++++ include/linux/sched/topology.h | 6 -- init/Kconfig | 53 +++++++++++++ kernel/sched/core.c | 166 +++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 51 +++++++++++++ 6 files changed, 343 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 1aec01365ed4..83a4a3ca3e8a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -220,4 +220,38 @@ int __order_base_2(unsigned long n) ilog2((n) - 1) + 1) : \ __order_base_2(n) \ ) + +static inline __attribute__((const)) +int __bits_per(unsigned long n) +{ + if (n < 2) + return 1; + if (is_power_of_2(n)) + return order_base_2(n) + 1; + return order_base_2(n); +} + +/** + * bits_per - calculate the number of bits required for the argument + * @n: parameter + * + * This is constant-capable and can be used for compile time + * initializations, e.g bitfields. + * + * The first few values calculated by this routine: + * bf(0) = 1 + * bf(1) = 1 + * bf(2) = 2 + * bf(3) = 2 + * bf(4) = 3 + * ... and so on. + */ +#define bits_per(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) \ + ? 1 : ilog2(n) + 1 \ + ) : \ + __bits_per(n) \ +) #endif /* _LINUX_LOG2_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 044c023875e8..80235bcd05f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -283,6 +283,18 @@ struct vtime { u64 gtime; }; +/* + * Utilization clamp constraints. + * @UCLAMP_MIN: Minimum utilization + * @UCLAMP_MAX: Maximum utilization + * @UCLAMP_CNT: Utilization clamp constraints count + */ +enum uclamp_id { + UCLAMP_MIN = 0, + UCLAMP_MAX, + UCLAMP_CNT +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -314,6 +326,10 @@ struct sched_info { # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) +/* Increase resolution of cpu_capacity calculations */ +# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + struct load_weight { unsigned long weight; u32 inv_weight; @@ -562,6 +578,25 @@ struct sched_dl_entity { struct hrtimer inactive_timer; }; +#ifdef CONFIG_UCLAMP_TASK +/* Number of utilization clamp buckets (shorter alias) */ +#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT + +/* + * Utilization clamp for a scheduling entity + * @value: clamp value "assigned" to a se + * @bucket_id: bucket index corresponding to the "assigned" value + * + * The bucket_id is the index of the clamp bucket matching the clamp value + * which is pre-computed and stored to avoid expensive integer divisions from + * the fast path. + */ +struct uclamp_se { + unsigned int value : bits_per(SCHED_CAPACITY_SCALE); + unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); +}; +#endif /* CONFIG_UCLAMP_TASK */ + union rcu_special { struct { u8 blocked; @@ -642,6 +677,10 @@ struct task_struct { #endif struct sched_dl_entity dl; +#ifdef CONFIG_UCLAMP_TASK + struct uclamp_se uclamp[UCLAMP_CNT]; +#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index e445d3767cdd..7863bb62d2ab 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -6,12 +6,6 @@ #include -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - /* * sched-domains (multiprocessor balancing) declarations: */ diff --git a/init/Kconfig b/init/Kconfig index 0e2344389501..c88289c18d59 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -677,6 +677,59 @@ config HAVE_UNSTABLE_SCHED_CLOCK config GENERIC_SCHED_CLOCK bool +menu "Scheduler features" + +config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks scheduled on that CPU. + + With this option, the user can specify the min and max CPU + utilization allowed for RUNNABLE tasks. The max utilization defines + the maximum frequency a task should use while the min utilization + defines the minimum frequency it should use. + + Both min and max utilization clamp values are hints to the scheduler, + aiming at improving its frequency selection policy, but they do not + enforce or grant any specific bandwidth for tasks. + + If in doubt, say N. + +config UCLAMP_BUCKETS_COUNT + int "Number of supported utilization clamp buckets" + range 5 20 + default 5 + depends on UCLAMP_TASK + help + Defines the number of clamp buckets to use. The range of each bucket + will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the + number of clamp buckets the finer their granularity and the higher + the precision of clamping aggregation and tracking at run-time. + + For example, with the minimum configuration value we will have 5 + clamp buckets tracking 20% utilization each. A 25% boosted tasks will + be refcounted in the [20..39]% bucket and will set the bucket clamp + effective value to 25%. + If a second 30% boosted task should be co-scheduled on the same CPU, + that task will be refcounted in the same bucket of the first task and + it will boost the bucket clamp effective value to 30%. + The clamp effective value of a bucket is reset to its nominal value + (20% in the example above) when there are no more tasks refcounted in + that bucket. + + An additional boost/capping margin can be added to some tasks. In the + example above the 25% task will be boosted to 30% until it exits the + CPU. If that should be considered not acceptable on certain systems, + it's always possible to reduce the margin by increasing the number of + clamp buckets to trade off used memory for run-time tracking + precision. + + If in doubt, use the default value. + +endmenu + # # For architectures that want to enable the support for NUMA-affine scheduler # balancing logic: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e5e02d23e693..d8c1e67afd82 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -772,6 +772,168 @@ static void set_load_weight(struct task_struct *p, bool update_load) } } +#ifdef CONFIG_UCLAMP_TASK + +/* Integer rounded range for each bucket */ +#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) + +#define for_each_clamp_id(clamp_id) \ + for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++) + +static inline unsigned int uclamp_bucket_id(unsigned int clamp_value) +{ + return clamp_value / UCLAMP_BUCKET_DELTA; +} + +static inline unsigned int uclamp_none(int clamp_id) +{ + if (clamp_id == UCLAMP_MIN) + return 0; + return SCHED_CAPACITY_SCALE; +} + +static inline void uclamp_se_set(struct uclamp_se *uc_se, unsigned int value) +{ + uc_se->value = value; + uc_se->bucket_id = uclamp_bucket_id(value); +} + +static inline +unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id) +{ + struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket; + int bucket_id = UCLAMP_BUCKETS - 1; + + /* + * Since both min and max clamps are max aggregated, find the + * top most bucket with tasks in. + */ + for ( ; bucket_id >= 0; bucket_id--) { + if (!bucket[bucket_id].tasks) + continue; + return bucket[bucket_id].value; + } + + /* No tasks -- default clamp values */ + return uclamp_none(clamp_id); +} + +/* + * When a task is enqueued on a rq, the clamp bucket currently defined by the + * task's uclamp::bucket_id is refcounted on that rq. This also immediately + * updates the rq's clamp value if required. + */ +static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, + unsigned int clamp_id) +{ + struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; + struct uclamp_se *uc_se = &p->uclamp[clamp_id]; + struct uclamp_bucket *bucket; + + lockdep_assert_held(&rq->lock); + + bucket = &uc_rq->bucket[uc_se->bucket_id]; + bucket->tasks++; + + if (uc_se->value > READ_ONCE(uc_rq->value)) + WRITE_ONCE(uc_rq->value, bucket->value); +} + +/* + * When a task is dequeued from a rq, the clamp bucket refcounted by the task + * is released. If this is the last task reference counting the rq's max + * active clamp value, then the rq's clamp value is updated. + * + * Both refcounted tasks and rq's cached clamp values are expected to be + * always valid. If it's detected they are not, as defensive programming, + * enforce the expected state and warn. + */ +static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, + unsigned int clamp_id) +{ + struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; + struct uclamp_se *uc_se = &p->uclamp[clamp_id]; + struct uclamp_bucket *bucket; + unsigned int rq_clamp; + + lockdep_assert_held(&rq->lock); + + bucket = &uc_rq->bucket[uc_se->bucket_id]; + SCHED_WARN_ON(!bucket->tasks); + if (likely(bucket->tasks)) + bucket->tasks--; + + if (likely(bucket->tasks)) + return; + + rq_clamp = READ_ONCE(uc_rq->value); + /* + * Defensive programming: this should never happen. If it happens, + * e.g. due to future modification, warn and fixup the expected value. + */ + SCHED_WARN_ON(bucket->value > rq_clamp); + if (bucket->value >= rq_clamp) + WRITE_ONCE(uc_rq->value, uclamp_rq_max_value(rq, clamp_id)); +} + +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) +{ + unsigned int clamp_id; + + if (unlikely(!p->sched_class->uclamp_enabled)) + return; + + for_each_clamp_id(clamp_id) + uclamp_rq_inc_id(rq, p, clamp_id); +} + +static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) +{ + unsigned int clamp_id; + + if (unlikely(!p->sched_class->uclamp_enabled)) + return; + + for_each_clamp_id(clamp_id) + uclamp_rq_dec_id(rq, p, clamp_id); +} + +static void __init init_uclamp(void) +{ + unsigned int clamp_id; + int cpu; + + for_each_possible_cpu(cpu) { + struct uclamp_bucket *bucket; + struct uclamp_rq *uc_rq; + unsigned int bucket_id; + + memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq)); + + for_each_clamp_id(clamp_id) { + uc_rq = &cpu_rq(cpu)->uclamp[clamp_id]; + + bucket_id = 1; + while (bucket_id < UCLAMP_BUCKETS) { + bucket = &uc_rq->bucket[bucket_id]; + bucket->value = bucket_id * UCLAMP_BUCKET_DELTA; + ++bucket_id; + } + } + } + + for_each_clamp_id(clamp_id) { + uclamp_se_set(&init_task.uclamp[clamp_id], + uclamp_none(clamp_id)); + } +} + +#else /* CONFIG_UCLAMP_TASK */ +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } +static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } +static inline void init_uclamp(void) { } +#endif /* CONFIG_UCLAMP_TASK */ + static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) { if (!(flags & ENQUEUE_NOCLOCK)) @@ -782,6 +944,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) psi_enqueue(p, flags & ENQUEUE_WAKEUP); } + uclamp_rq_inc(rq, p); p->sched_class->enqueue_task(rq, p, flags); } @@ -795,6 +958,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) psi_dequeue(p, flags & DEQUEUE_SLEEP); } + uclamp_rq_dec(rq, p); p->sched_class->dequeue_task(rq, p, flags); } @@ -6093,6 +6257,8 @@ void __init sched_init(void) psi_init(); + init_uclamp(); + scheduler_running = 1; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e58ab597ec88..cecc6baaba93 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -791,6 +791,48 @@ extern void rto_push_irq_work_func(struct irq_work *work); #endif #endif /* CONFIG_SMP */ +#ifdef CONFIG_UCLAMP_TASK +/* + * struct uclamp_bucket - Utilization clamp bucket + * @value: utilization clamp value for tasks on this clamp bucket + * @tasks: number of RUNNABLE tasks on this clamp bucket + * + * Keep track of how many tasks are RUNNABLE for a given utilization + * clamp value. + */ +struct uclamp_bucket { + unsigned long value : bits_per(SCHED_CAPACITY_SCALE); + unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE); +}; + +/* + * struct uclamp_rq - rq's utilization clamp + * @value: currently active clamp values for a rq + * @bucket: utilization clamp buckets affecting a rq + * + * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values. + * A clamp value is affecting a rq when there is at least one task RUNNABLE + * (or actually running) with that value. + * + * There are up to UCLAMP_CNT possible different clamp values, currently there + * are only two: minimum utilization and maximum utilization. + * + * All utilization clamping values are MAX aggregated, since: + * - for util_min: we want to run the CPU at least at the max of the minimum + * utilization required by its currently RUNNABLE tasks. + * - for util_max: we want to allow the CPU to run up to the max of the + * maximum utilization allowed by its currently RUNNABLE tasks. + * + * Since on each system we expect only a limited number of different + * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track + * the metrics required to compute all the per-rq utilization clamp values. + */ +struct uclamp_rq { + unsigned int value; + struct uclamp_bucket bucket[UCLAMP_BUCKETS]; +}; +#endif /* CONFIG_UCLAMP_TASK */ + /* * This is the main, per-CPU runqueue data structure. * @@ -825,6 +867,11 @@ struct rq { unsigned long nr_load_updates; u64 nr_switches; +#ifdef CONFIG_UCLAMP_TASK + /* Utilization clamp values based on CPU's RUNNABLE tasks */ + struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned; +#endif + struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; @@ -1639,6 +1686,10 @@ extern const u32 sched_prio_to_wmult[40]; struct sched_class { const struct sched_class *next; +#ifdef CONFIG_UCLAMP_TASK + int uclamp_enabled; +#endif + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*yield_task) (struct rq *rq); -- cgit v1.2.3 From e8f14172c6b11e9a86c65532497087f8eb0f91b1 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 21 Jun 2019 09:42:05 +0100 Subject: sched/uclamp: Add system default clamps Tasks without a user-defined clamp value are considered not clamped and by default their utilization can have any value in the [0..SCHED_CAPACITY_SCALE] range. Tasks with a user-defined clamp value are allowed to request any value in that range, and the required clamp is unconditionally enforced. However, a "System Management Software" could be interested in limiting the range of clamp values allowed for all tasks. Add a privileged interface to define a system default configuration via: /proc/sys/kernel/sched_uclamp_util_{min,max} which works as an unconditional clamp range restriction for all tasks. With the default configuration, the full SCHED_CAPACITY_SCALE range of values is allowed for each clamp index. Otherwise, the task-specific clamp is capped by the corresponding system default value. Do that by tracking, for each task, the "effective" clamp value and bucket the task has been refcounted in at enqueue time. This allows to lazy aggregate "requested" and "system default" values at enqueue time and simplifies refcounting updates at dequeue time. The cached bucket ids are used to avoid (relatively) more expensive integer divisions every time a task is enqueued. An active flag is used to report when the "effective" value is valid and thus the task is actually refcounted in the corresponding rq's bucket. Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Cc: Alessio Balsini Cc: Dietmar Eggemann Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Paul Turner Cc: Peter Zijlstra Cc: Quentin Perret Cc: Rafael J . Wysocki Cc: Steve Muckle Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Cc: Viresh Kumar Link: https://lkml.kernel.org/r/20190621084217.8167-5-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++ include/linux/sched/sysctl.h | 11 +++++ kernel/sched/core.c | 99 +++++++++++++++++++++++++++++++++++++++++++- kernel/sysctl.c | 16 +++++++ 4 files changed, 135 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 80235bcd05f2..5485f411e8e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -586,14 +586,21 @@ struct sched_dl_entity { * Utilization clamp for a scheduling entity * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value + * @active: the se is currently refcounted in a rq's bucket * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from * the fast path. + * + * The active bit is set whenever a task has got an "effective" value assigned, + * which can be different from the clamp value "requested" from user-space. + * This allows to know a task is refcounted in the rq's bucket corresponding + * to the "effective" bucket_id. */ struct uclamp_se { unsigned int value : bits_per(SCHED_CAPACITY_SCALE); unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); + unsigned int active : 1; }; #endif /* CONFIG_UCLAMP_TASK */ @@ -678,6 +685,9 @@ struct task_struct { struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK + /* Clamp values requested for a scheduling entity */ + struct uclamp_se uclamp_req[UCLAMP_CNT]; + /* Effective clamp values used for a scheduling entity */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 99ce6d728df7..d4f6215ee03f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +#ifdef CONFIG_UCLAMP_TASK +extern unsigned int sysctl_sched_uclamp_util_min; +extern unsigned int sysctl_sched_uclamp_util_max; +#endif + #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif @@ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_UCLAMP_TASK +extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + extern int sysctl_numa_balancing(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2dde735635ec..b74de86b68c7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -773,6 +773,14 @@ static void set_load_weight(struct task_struct *p, bool update_load) } #ifdef CONFIG_UCLAMP_TASK +/* Max allowed minimum utilization */ +unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; + +/* Max allowed maximum utilization */ +unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; + +/* All clamps are required to be less or equal than these values */ +static struct uclamp_se uclamp_default[UCLAMP_CNT]; /* Integer rounded range for each bucket */ #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) @@ -851,6 +859,25 @@ unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id, return uclamp_idle_value(rq, clamp_id, clamp_value); } +/* + * The effective clamp bucket index of a task depends on, by increasing + * priority: + * - the task specific clamp value, when explicitly requested from userspace + * - the system default clamp value, defined by the sysadmin + */ +static inline struct uclamp_se +uclamp_eff_get(struct task_struct *p, unsigned int clamp_id) +{ + struct uclamp_se uc_req = p->uclamp_req[clamp_id]; + struct uclamp_se uc_max = uclamp_default[clamp_id]; + + /* System default restrictions always apply */ + if (unlikely(uc_req.value > uc_max.value)) + return uc_max; + + return uc_req; +} + /* * When a task is enqueued on a rq, the clamp bucket currently defined by the * task's uclamp::bucket_id is refcounted on that rq. This also immediately @@ -870,8 +897,12 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, lockdep_assert_held(&rq->lock); + /* Update task effective clamp */ + p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id); + bucket = &uc_rq->bucket[uc_se->bucket_id]; bucket->tasks++; + uc_se->active = true; uclamp_idle_reset(rq, clamp_id, uc_se->value); @@ -910,6 +941,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, SCHED_WARN_ON(!bucket->tasks); if (likely(bucket->tasks)) bucket->tasks--; + uc_se->active = false; /* * Keep "local max aggregation" simple and accept to (possibly) @@ -958,8 +990,65 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) uclamp_rq_dec_id(rq, p, clamp_id); } +int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old_min, old_max; + static DEFINE_MUTEX(mutex); + int result; + + mutex_lock(&mutex); + old_min = sysctl_sched_uclamp_util_min; + old_max = sysctl_sched_uclamp_util_max; + + result = proc_dointvec(table, write, buffer, lenp, ppos); + if (result) + goto undo; + if (!write) + goto done; + + if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || + sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { + result = -EINVAL; + goto undo; + } + + if (old_min != sysctl_sched_uclamp_util_min) { + uclamp_se_set(&uclamp_default[UCLAMP_MIN], + sysctl_sched_uclamp_util_min); + } + if (old_max != sysctl_sched_uclamp_util_max) { + uclamp_se_set(&uclamp_default[UCLAMP_MAX], + sysctl_sched_uclamp_util_max); + } + + /* + * Updating all the RUNNABLE task is expensive, keep it simple and do + * just a lazy update at each next enqueue time. + */ + goto done; + +undo: + sysctl_sched_uclamp_util_min = old_min; + sysctl_sched_uclamp_util_max = old_max; +done: + mutex_unlock(&mutex); + + return result; +} + +static void uclamp_fork(struct task_struct *p) +{ + unsigned int clamp_id; + + for_each_clamp_id(clamp_id) + p->uclamp[clamp_id].active = false; +} + static void __init init_uclamp(void) { + struct uclamp_se uc_max = {}; unsigned int clamp_id; int cpu; @@ -969,14 +1058,20 @@ static void __init init_uclamp(void) } for_each_clamp_id(clamp_id) { - uclamp_se_set(&init_task.uclamp[clamp_id], + uclamp_se_set(&init_task.uclamp_req[clamp_id], uclamp_none(clamp_id)); } + + /* System defaults allow max clamp values for both indexes */ + uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX)); + for_each_clamp_id(clamp_id) + uclamp_default[clamp_id] = uc_max; } #else /* CONFIG_UCLAMP_TASK */ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } +static inline void uclamp_fork(struct task_struct *p) { } static inline void init_uclamp(void) { } #endif /* CONFIG_UCLAMP_TASK */ @@ -2545,6 +2640,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) */ p->prio = current->normal_prio; + uclamp_fork(p); + /* * Revert to default priority/policy on fork if requested. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1beca96fb625..1c1ad1e14f21 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -452,6 +452,22 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rr_handler, }, +#ifdef CONFIG_UCLAMP_TASK + { + .procname = "sched_util_clamp_min", + .data = &sysctl_sched_uclamp_util_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, + { + .procname = "sched_util_clamp_max", + .data = &sysctl_sched_uclamp_util_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, +#endif #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled", -- cgit v1.2.3 From a509a7cd79747074a2c018a45bbbc52d1f4aed44 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 21 Jun 2019 09:42:07 +0100 Subject: sched/uclamp: Extend sched_setattr() to support utilization clamping The SCHED_DEADLINE scheduling class provides an advanced and formal model to define tasks requirements that can translate into proper decisions for both task placements and frequencies selections. Other classes have a more simplified model based on the POSIX concept of priorities. Such a simple priority based model however does not allow to exploit most advanced features of the Linux scheduler like, for example, driving frequencies selection via the schedutil cpufreq governor. However, also for non SCHED_DEADLINE tasks, it's still interesting to define tasks properties to support scheduler decisions. Utilization clamping exposes to user-space a new set of per-task attributes the scheduler can use as hints about the expected/required utilization for a task. This allows to implement a "proactive" per-task frequency control policy, a more advanced policy than the current one based just on "passive" measured task utilization. For example, it's possible to boost interactive tasks (e.g. to get better performance) or cap background tasks (e.g. to be more energy/thermal efficient). Introduce a new API to set utilization clamping values for a specified task by extending sched_setattr(), a syscall which already allows to define task specific properties for different scheduling classes. A new pair of attributes allows to specify a minimum and maximum utilization the scheduler can consider for a task. Do that by validating the required clamp values before and then applying the required changes using _the_ same pattern already in use for __setscheduler(). This ensures that the task is re-enqueued with the new clamp values. Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Cc: Alessio Balsini Cc: Dietmar Eggemann Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Paul Turner Cc: Peter Zijlstra Cc: Quentin Perret Cc: Rafael J . Wysocki Cc: Steve Muckle Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Cc: Viresh Kumar Link: https://lkml.kernel.org/r/20190621084217.8167-7-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 ++++ include/uapi/linux/sched.h | 12 +++++- include/uapi/linux/sched/types.h | 66 +++++++++++++++++++++++++---- kernel/sched/core.c | 91 ++++++++++++++++++++++++++++++++++++---- 4 files changed, 161 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5485f411e8e1..1113dd4706ae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -587,6 +587,7 @@ struct sched_dl_entity { * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value * @active: the se is currently refcounted in a rq's bucket + * @user_defined: the requested clamp value comes from user-space * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from @@ -596,11 +597,19 @@ struct sched_dl_entity { * which can be different from the clamp value "requested" from user-space. * This allows to know a task is refcounted in the rq's bucket corresponding * to the "effective" bucket_id. + * + * The user_defined bit is set whenever a task has got a task-specific clamp + * value requested from userspace, i.e. the system defaults apply to this task + * just as a restriction. This allows to relax default clamps when a less + * restrictive task-specific value has been requested, thus allowing to + * implement a "nice" semantic. For example, a task running with a 20% + * default boost can still drop its own boosting to 0%. */ struct uclamp_se { unsigned int value : bits_per(SCHED_CAPACITY_SCALE); unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); unsigned int active : 1; + unsigned int user_defined : 1; }; #endif /* CONFIG_UCLAMP_TASK */ diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 58b2368d3634..617bb59aa8ba 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -52,10 +52,20 @@ #define SCHED_FLAG_RECLAIM 0x02 #define SCHED_FLAG_DL_OVERRUN 0x04 #define SCHED_FLAG_KEEP_POLICY 0x08 +#define SCHED_FLAG_KEEP_PARAMS 0x10 +#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20 +#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 + +#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \ + SCHED_FLAG_KEEP_PARAMS) + +#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \ + SCHED_FLAG_UTIL_CLAMP_MAX) #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ SCHED_FLAG_RECLAIM | \ SCHED_FLAG_DL_OVERRUN | \ - SCHED_FLAG_KEEP_POLICY) + SCHED_FLAG_KEEP_ALL | \ + SCHED_FLAG_UTIL_CLAMP) #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h index 10fbb8031930..c852153ddb0d 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -9,6 +9,7 @@ struct sched_param { }; #define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ +#define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */ /* * Extended scheduling parameters data structure. @@ -21,8 +22,33 @@ struct sched_param { * the tasks may be useful for a wide variety of application fields, e.g., * multimedia, streaming, automation and control, and many others. * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: + * This variant (sched_attr) allows to define additional attributes to + * improve the scheduler knowledge about task requirements. + * + * Scheduling Class Attributes + * =========================== + * + * A subset of sched_attr attributes specifies the + * scheduling policy and relative POSIX attributes: + * + * @size size of the structure, for fwd/bwd compat. + * + * @sched_policy task's scheduling policy + * @sched_nice task's nice value (SCHED_NORMAL/BATCH) + * @sched_priority task's static priority (SCHED_FIFO/RR) + * + * Certain more advanced scheduling features can be controlled by a + * predefined set of flags via the attribute: + * + * @sched_flags for customizing the scheduler behaviour + * + * Sporadic Time-Constrained Task Attributes + * ========================================= + * + * A subset of sched_attr attributes allows to describe a so-called + * sporadic time-constrained task. + * + * In such a model a task is specified by: * - the activation period or minimum instance inter-arrival time; * - the maximum (or average, depending on the actual scheduling * discipline) computation time of all instances, a.k.a. runtime; @@ -34,14 +60,8 @@ struct sched_param { * than the runtime and must be completed by time instant t equal to * the instance activation time + the deadline. * - * This is reflected by the actual fields of the sched_attr structure: + * This is reflected by the following fields of the sched_attr structure: * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) * @sched_deadline representative of the task's deadline * @sched_runtime representative of the task's runtime * @sched_period representative of the task's period @@ -53,6 +73,29 @@ struct sched_param { * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the * only user of this new interface. More information about the algorithm * available in the scheduling class file or in Documentation/. + * + * Task Utilization Attributes + * =========================== + * + * A subset of sched_attr attributes allows to specify the utilization + * expected for a task. These attributes allow to inform the scheduler about + * the utilization boundaries within which it should schedule the task. These + * boundaries are valuable hints to support scheduler decisions on both task + * placement and frequency selection. + * + * @sched_util_min represents the minimum utilization + * @sched_util_max represents the maximum utilization + * + * Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It + * represents the percentage of CPU time used by a task when running at the + * maximum frequency on the highest capacity CPU of the system. For example, a + * 20% utilization task is a task running for 2ms every 10ms at maximum + * frequency. + * + * A task with a min utilization value bigger than 0 is more likely scheduled + * on a CPU with a capacity big enough to fit the specified value. + * A task with a max utilization value smaller than 1024 is more likely + * scheduled on a CPU with no more capacity than the specified value. */ struct sched_attr { __u32 size; @@ -70,6 +113,11 @@ struct sched_attr { __u64 sched_runtime; __u64 sched_deadline; __u64 sched_period; + + /* Utilization hints */ + __u32 sched_util_min; + __u32 sched_util_max; + }; #endif /* _UAPI_LINUX_SCHED_TYPES_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6d519f3f9789..e9a669266fa9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -805,10 +805,12 @@ static inline unsigned int uclamp_none(int clamp_id) return SCHED_CAPACITY_SCALE; } -static inline void uclamp_se_set(struct uclamp_se *uc_se, unsigned int value) +static inline void uclamp_se_set(struct uclamp_se *uc_se, + unsigned int value, bool user_defined) { uc_se->value = value; uc_se->bucket_id = uclamp_bucket_id(value); + uc_se->user_defined = user_defined; } static inline unsigned int @@ -1016,11 +1018,11 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, if (old_min != sysctl_sched_uclamp_util_min) { uclamp_se_set(&uclamp_default[UCLAMP_MIN], - sysctl_sched_uclamp_util_min); + sysctl_sched_uclamp_util_min, false); } if (old_max != sysctl_sched_uclamp_util_max) { uclamp_se_set(&uclamp_default[UCLAMP_MAX], - sysctl_sched_uclamp_util_max); + sysctl_sched_uclamp_util_max, false); } /* @@ -1038,6 +1040,42 @@ done: return result; } +static int uclamp_validate(struct task_struct *p, + const struct sched_attr *attr) +{ + unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; + unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; + + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) + lower_bound = attr->sched_util_min; + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) + upper_bound = attr->sched_util_max; + + if (lower_bound > upper_bound) + return -EINVAL; + if (upper_bound > SCHED_CAPACITY_SCALE) + return -EINVAL; + + return 0; +} + +static void __setscheduler_uclamp(struct task_struct *p, + const struct sched_attr *attr) +{ + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) + return; + + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { + uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], + attr->sched_util_min, true); + } + + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { + uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], + attr->sched_util_max, true); + } +} + static void uclamp_fork(struct task_struct *p) { unsigned int clamp_id; @@ -1059,11 +1097,11 @@ static void __init init_uclamp(void) for_each_clamp_id(clamp_id) { uclamp_se_set(&init_task.uclamp_req[clamp_id], - uclamp_none(clamp_id)); + uclamp_none(clamp_id), false); } /* System defaults allow max clamp values for both indexes */ - uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX)); + uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false); for_each_clamp_id(clamp_id) uclamp_default[clamp_id] = uc_max; } @@ -1071,6 +1109,13 @@ static void __init init_uclamp(void) #else /* CONFIG_UCLAMP_TASK */ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } +static inline int uclamp_validate(struct task_struct *p, + const struct sched_attr *attr) +{ + return -EOPNOTSUPP; +} +static void __setscheduler_uclamp(struct task_struct *p, + const struct sched_attr *attr) { } static inline void uclamp_fork(struct task_struct *p) { } static inline void init_uclamp(void) { } #endif /* CONFIG_UCLAMP_TASK */ @@ -4412,6 +4457,13 @@ static void __setscheduler_params(struct task_struct *p, static void __setscheduler(struct rq *rq, struct task_struct *p, const struct sched_attr *attr, bool keep_boost) { + /* + * If params can't change scheduling class changes aren't allowed + * either. + */ + if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) + return; + __setscheduler_params(p, attr); /* @@ -4549,6 +4601,13 @@ recheck: return retval; } + /* Update task specific "requested" clamps */ + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { + retval = uclamp_validate(p, attr); + if (retval) + return retval; + } + /* * Make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: @@ -4578,6 +4637,8 @@ recheck: goto change; if (dl_policy(policy) && dl_param_changed(p, attr)) goto change; + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) + goto change; p->sched_reset_on_fork = reset_on_fork; task_rq_unlock(rq, p, &rf); @@ -4658,7 +4719,9 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; + __setscheduler(rq, p, attr, pi); + __setscheduler_uclamp(p, attr); if (queued) { /* @@ -4834,6 +4897,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a if (ret) return -EFAULT; + if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && + size < SCHED_ATTR_SIZE_VER1) + return -EINVAL; + /* * XXX: Do we want to be lenient like existing syscalls; or do we want * to be strict and return an error on out-of-bounds values? @@ -4903,10 +4970,15 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, rcu_read_lock(); retval = -ESRCH; p = find_process_by_pid(pid); - if (p != NULL) - retval = sched_setattr(p, &attr); + if (likely(p)) + get_task_struct(p); rcu_read_unlock(); + if (likely(p)) { + retval = sched_setattr(p, &attr); + put_task_struct(p); + } + return retval; } @@ -5057,6 +5129,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, else attr.sched_nice = task_nice(p); +#ifdef CONFIG_UCLAMP_TASK + attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; + attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; +#endif + rcu_read_unlock(); retval = sched_read_attr(uattr, &attr, size); -- cgit v1.2.3 From fd7d55172d1e2e501e6da0a5c1de25f06612dc2e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 1 Jun 2019 01:27:22 -0700 Subject: perf/cgroups: Don't rotate events for cgroups unnecessarily Currently perf_rotate_context assumes that if the context's nr_events != nr_active a rotation is necessary for perf event multiplexing. With cgroups, nr_events is the total count of events for all cgroups and nr_active will not include events in a cgroup other than the current task's. This makes rotation appear necessary for cgroups when it is not. Add a perf_event_context flag that is set when rotation is necessary. Clear the flag during sched_out and set it when a flexible sched_in fails due to resources. Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20190601082722.44543-1-irogers@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 42 ++++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3dc01cf98e16..2ddae518dce6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -749,6 +749,11 @@ struct perf_event_context { int nr_stat; int nr_freq; int rotate_disable; + /* + * Set when nr_events != nr_active, except tolerant to events not + * necessary to be active due to scheduling constraints, such as cgroups. + */ + int rotate_necessary; refcount_t refcount; struct task_struct *task; diff --git a/kernel/events/core.c b/kernel/events/core.c index 118ad1aef6af..23efe6792abc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2952,6 +2952,12 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (!ctx->nr_active || !(is_active & EVENT_ALL)) return; + /* + * If we had been multiplexing, no rotations are necessary, now no events + * are active. + */ + ctx->rotate_necessary = 0; + perf_pmu_disable(ctx->pmu); if (is_active & EVENT_PINNED) { list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) @@ -3319,10 +3325,13 @@ static int flexible_sched_in(struct perf_event *event, void *data) return 0; if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { - if (!group_sched_in(event, sid->cpuctx, sid->ctx)) - list_add_tail(&event->active_list, &sid->ctx->flexible_active); - else + int ret = group_sched_in(event, sid->cpuctx, sid->ctx); + if (ret) { sid->can_add_hw = 0; + sid->ctx->rotate_necessary = 1; + return 0; + } + list_add_tail(&event->active_list, &sid->ctx->flexible_active); } return 0; @@ -3690,24 +3699,17 @@ ctx_first_active(struct perf_event_context *ctx) static bool perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event *cpu_event = NULL, *task_event = NULL; - bool cpu_rotate = false, task_rotate = false; - struct perf_event_context *ctx = NULL; + struct perf_event_context *task_ctx = NULL; + int cpu_rotate, task_rotate; /* * Since we run this from IRQ context, nobody can install new * events, thus the event count values are stable. */ - if (cpuctx->ctx.nr_events) { - if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) - cpu_rotate = true; - } - - ctx = cpuctx->task_ctx; - if (ctx && ctx->nr_events) { - if (ctx->nr_events != ctx->nr_active) - task_rotate = true; - } + cpu_rotate = cpuctx->ctx.rotate_necessary; + task_ctx = cpuctx->task_ctx; + task_rotate = task_ctx ? task_ctx->rotate_necessary : 0; if (!(cpu_rotate || task_rotate)) return false; @@ -3716,7 +3718,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) perf_pmu_disable(cpuctx->ctx.pmu); if (task_rotate) - task_event = ctx_first_active(ctx); + task_event = ctx_first_active(task_ctx); if (cpu_rotate) cpu_event = ctx_first_active(&cpuctx->ctx); @@ -3724,17 +3726,17 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) * As per the order given at ctx_resched() first 'pop' task flexible * and then, if needed CPU flexible. */ - if (task_event || (ctx && cpu_event)) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + if (task_event || (task_ctx && cpu_event)) + ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE); if (cpu_event) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (task_event) - rotate_ctx(ctx, task_event); + rotate_ctx(task_ctx, task_event); if (cpu_event) rotate_ctx(&cpuctx->ctx, cpu_event); - perf_event_sched_in(cpuctx, ctx, current); + perf_event_sched_in(cpuctx, task_ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); -- cgit v1.2.3 From 44038bc514a244fba9d0d6d698b15970185ac251 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 20 May 2019 13:54:58 -0700 Subject: tpm: Abstract crypto agile event size calculations We need to calculate the size of crypto agile events in multiple locations, including in the EFI boot stub. The easiest way to do this is to put it in a header file as an inline and leave a wrapper to ensure we don't end up with multiple copies of it embedded in the existing code. Signed-off-by: Matthew Garrett Reviewed-by: Bartosz Szczepanek Tested-by: Bartosz Szczepanek Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/eventlog/tpm2.c | 47 +-------------------------- include/linux/tpm_eventlog.h | 68 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c index d506362e046f..22b3346b9946 100644 --- a/drivers/char/tpm/eventlog/tpm2.c +++ b/drivers/char/tpm/eventlog/tpm2.c @@ -36,52 +36,7 @@ static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header) { - struct tcg_efi_specid_event_head *efispecid; - struct tcg_event_field *event_field; - void *marker; - void *marker_start; - u32 halg_size; - size_t size; - u16 halg; - int i; - int j; - - marker = event; - marker_start = marker; - marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type) - + sizeof(event->count); - - efispecid = (struct tcg_efi_specid_event_head *)event_header->event; - - /* Check if event is malformed. */ - if (event->count > efispecid->num_algs) - return 0; - - for (i = 0; i < event->count; i++) { - halg_size = sizeof(event->digests[i].alg_id); - memcpy(&halg, marker, halg_size); - marker = marker + halg_size; - for (j = 0; j < efispecid->num_algs; j++) { - if (halg == efispecid->digest_sizes[j].alg_id) { - marker += - efispecid->digest_sizes[j].digest_size; - break; - } - } - /* Algorithm without known length. Such event is unparseable. */ - if (j == efispecid->num_algs) - return 0; - } - - event_field = (struct tcg_event_field *)marker; - marker = marker + sizeof(event_field->event_size) - + event_field->event_size; - size = marker - marker_start; - - if ((event->event_type == 0) && (event_field->event_size == 0)) - return 0; - - return size; + return __calc_tpm2_event_size(event, event_header); } static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 81519f163211..6a86144e13f1 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -112,4 +112,72 @@ struct tcg_pcr_event2_head { struct tpm_digest digests[]; } __packed; +/** + * __calc_tpm2_event_size - calculate the size of a TPM2 event log entry + * @event: Pointer to the event whose size should be calculated + * @event_header: Pointer to the initial event containing the digest lengths + * + * The TPM2 event log format can contain multiple digests corresponding to + * separate PCR banks, and also contains a variable length of the data that + * was measured. This requires knowledge of how long each digest type is, + * and this information is contained within the first event in the log. + * + * We calculate the length by examining the number of events, and then looking + * at each event in turn to determine how much space is used for events in + * total. Once we've done this we know the offset of the data length field, + * and can calculate the total size of the event. + * + * Return: size of the event on success, <0 on failure + */ + +static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, + struct tcg_pcr_event *event_header) +{ + struct tcg_efi_specid_event_head *efispecid; + struct tcg_event_field *event_field; + void *marker; + void *marker_start; + u32 halg_size; + size_t size; + u16 halg; + int i; + int j; + + marker = event; + marker_start = marker; + marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type) + + sizeof(event->count); + + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; + + /* Check if event is malformed. */ + if (event->count > efispecid->num_algs) + return 0; + + for (i = 0; i < event->count; i++) { + halg_size = sizeof(event->digests[i].alg_id); + memcpy(&halg, marker, halg_size); + marker = marker + halg_size; + for (j = 0; j < efispecid->num_algs; j++) { + if (halg == efispecid->digest_sizes[j].alg_id) { + marker += + efispecid->digest_sizes[j].digest_size; + break; + } + } + /* Algorithm without known length. Such event is unparseable. */ + if (j == efispecid->num_algs) + return 0; + } + + event_field = (struct tcg_event_field *)marker; + marker = marker + sizeof(event_field->event_size) + + event_field->event_size; + size = marker - marker_start; + + if ((event->event_type == 0) && (event_field->event_size == 0)) + return 0; + + return size; +} #endif -- cgit v1.2.3 From c46f3405692de1ac82240d927b9c7a0f9d6a4a36 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 20 May 2019 13:54:59 -0700 Subject: tpm: Reserve the TPM final events table UEFI systems provide a boot services protocol for obtaining the TPM event log, but this is unusable after ExitBootServices() is called. Unfortunately ExitBootServices() itself triggers additional TPM events that then can't be obtained using this protocol. The platform provides a mechanism for the OS to obtain these events by recording them to a separate UEFI configuration table which the OS can then map. Unfortunately this table isn't self describing in terms of providing its length, so we need to parse the events inside it to figure out how long it is. Since the table isn't mapped at this point, we need to extend the length calculation function to be able to map the event as it goes along. (Fixes by Bartosz Szczepanek ) Signed-off-by: Matthew Garrett Acked-by: Ard Biesheuvel Reviewed-by: Bartosz Szczepanek Tested-by: Bartosz Szczepanek Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/eventlog/tpm2.c | 2 +- drivers/firmware/efi/efi.c | 2 + drivers/firmware/efi/tpm.c | 63 ++++++++++++++++++++++-- include/linux/efi.h | 9 ++++ include/linux/tpm_eventlog.h | 102 +++++++++++++++++++++++++++++++++++---- 5 files changed, 164 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c index 22b3346b9946..b9aeda1cbcd7 100644 --- a/drivers/char/tpm/eventlog/tpm2.c +++ b/drivers/char/tpm/eventlog/tpm2.c @@ -36,7 +36,7 @@ static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header) { - return __calc_tpm2_event_size(event, event_header); + return __calc_tpm2_event_size(event, event_header, false); } static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 16b2137d117c..a449e645c44f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -52,6 +52,7 @@ struct efi __read_mostly efi = { .mem_attr_table = EFI_INVALID_TABLE_ADDR, .rng_seed = EFI_INVALID_TABLE_ADDR, .tpm_log = EFI_INVALID_TABLE_ADDR, + .tpm_final_log = EFI_INVALID_TABLE_ADDR, .mem_reserve = EFI_INVALID_TABLE_ADDR, }; EXPORT_SYMBOL(efi); @@ -484,6 +485,7 @@ static __initdata efi_config_table_type_t common_tables[] = { {EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table}, {LINUX_EFI_RANDOM_SEED_TABLE_GUID, "RNG", &efi.rng_seed}, {LINUX_EFI_TPM_EVENT_LOG_GUID, "TPMEventLog", &efi.tpm_log}, + {LINUX_EFI_TPM_FINAL_LOG_GUID, "TPMFinalLog", &efi.tpm_final_log}, {LINUX_EFI_MEMRESERVE_TABLE_GUID, "MEMRESERVE", &efi.mem_reserve}, {NULL_GUID, NULL, NULL}, }; diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 3a689b40ccc0..1d3f5ca3eaaf 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -4,11 +4,34 @@ * Thiebaud Weksteen */ +#define TPM_MEMREMAP(start, size) early_memremap(start, size) +#define TPM_MEMUNMAP(start, size) early_memunmap(start, size) + +#include #include #include #include +#include -#include +int efi_tpm_final_log_size; +EXPORT_SYMBOL(efi_tpm_final_log_size); + +static int tpm2_calc_event_log_size(void *data, int count, void *size_info) +{ + struct tcg_pcr_event2_head *header; + int event_size, size = 0; + + while (count > 0) { + header = data + size; + event_size = __calc_tpm2_event_size(header, size_info, true); + if (event_size == 0) + return -1; + size += event_size; + count--; + } + + return size; +} /* * Reserve the memory associated with the TPM Event Log configuration table. @@ -16,22 +39,54 @@ int __init efi_tpm_eventlog_init(void) { struct linux_efi_tpm_eventlog *log_tbl; + struct efi_tcg2_final_events_table *final_tbl; unsigned int tbl_size; + int ret = 0; - if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) + if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) { + /* + * We can't calculate the size of the final events without the + * first entry in the TPM log, so bail here. + */ return 0; + } log_tbl = early_memremap(efi.tpm_log, sizeof(*log_tbl)); if (!log_tbl) { pr_err("Failed to map TPM Event Log table @ 0x%lx\n", - efi.tpm_log); + efi.tpm_log); efi.tpm_log = EFI_INVALID_TABLE_ADDR; return -ENOMEM; } tbl_size = sizeof(*log_tbl) + log_tbl->size; memblock_reserve(efi.tpm_log, tbl_size); + + if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) + goto out; + + final_tbl = early_memremap(efi.tpm_final_log, sizeof(*final_tbl)); + + if (!final_tbl) { + pr_err("Failed to map TPM Final Event Log table @ 0x%lx\n", + efi.tpm_final_log); + efi.tpm_final_log = EFI_INVALID_TABLE_ADDR; + ret = -ENOMEM; + goto out; + } + + tbl_size = tpm2_calc_event_log_size((void *)efi.tpm_final_log + + sizeof(final_tbl->version) + + sizeof(final_tbl->nr_events), + final_tbl->nr_events, + log_tbl->log); + memblock_reserve((unsigned long)final_tbl, + tbl_size + sizeof(*final_tbl)); + early_memunmap(final_tbl, sizeof(*final_tbl)); + efi_tpm_final_log_size = tbl_size; + +out: early_memunmap(log_tbl, sizeof(*log_tbl)); - return 0; + return ret; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 6ebc2098cfe1..b391263d8ec6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -689,6 +689,7 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) +#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) typedef struct { @@ -996,6 +997,7 @@ extern struct efi { unsigned long mem_attr_table; /* memory attributes table */ unsigned long rng_seed; /* UEFI firmware random seed */ unsigned long tpm_log; /* TPM2 Event Log table */ + unsigned long tpm_final_log; /* TPM2 Final Events Log table */ unsigned long mem_reserve; /* Linux EFI memreserve table */ efi_get_time_t *get_time; efi_set_time_t *set_time; @@ -1712,6 +1714,13 @@ struct linux_efi_tpm_eventlog { extern int efi_tpm_eventlog_init(void); +struct efi_tcg2_final_events_table { + u64 version; + u64 nr_events; + u8 events[]; +}; +extern int efi_tpm_final_log_size; + /* * efi_runtime_service() function identifiers. * "NONE" is used by efi_recover_from_page_fault() to check if the page diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 6a86144e13f1..63238c84dc0b 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -112,10 +112,35 @@ struct tcg_pcr_event2_head { struct tpm_digest digests[]; } __packed; +struct tcg_algorithm_size { + u16 algorithm_id; + u16 algorithm_size; +}; + +struct tcg_algorithm_info { + u8 signature[16]; + u32 platform_class; + u8 spec_version_minor; + u8 spec_version_major; + u8 spec_errata; + u8 uintn_size; + u32 number_of_algorithms; + struct tcg_algorithm_size digest_sizes[]; +}; + +#ifndef TPM_MEMREMAP +#define TPM_MEMREMAP(start, size) NULL +#endif + +#ifndef TPM_MEMUNMAP +#define TPM_MEMUNMAP(start, size) do{} while(0) +#endif + /** * __calc_tpm2_event_size - calculate the size of a TPM2 event log entry * @event: Pointer to the event whose size should be calculated * @event_header: Pointer to the initial event containing the digest lengths + * @do_mapping: Whether or not the event needs to be mapped * * The TPM2 event log format can contain multiple digests corresponding to * separate PCR banks, and also contains a variable length of the data that @@ -131,10 +156,13 @@ struct tcg_pcr_event2_head { */ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, - struct tcg_pcr_event *event_header) + struct tcg_pcr_event *event_header, + bool do_mapping) { struct tcg_efi_specid_event_head *efispecid; struct tcg_event_field *event_field; + void *mapping = NULL; + int mapping_size; void *marker; void *marker_start; u32 halg_size; @@ -148,16 +176,49 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type) + sizeof(event->count); + /* Map the event header */ + if (do_mapping) { + mapping_size = marker - marker_start; + mapping = TPM_MEMREMAP((unsigned long)marker_start, + mapping_size); + if (!mapping) { + size = 0; + goto out; + } + } else { + mapping = marker_start; + } + + event = (struct tcg_pcr_event2_head *)mapping; + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; /* Check if event is malformed. */ - if (event->count > efispecid->num_algs) - return 0; + if (event->count > efispecid->num_algs) { + size = 0; + goto out; + } for (i = 0; i < event->count; i++) { halg_size = sizeof(event->digests[i].alg_id); - memcpy(&halg, marker, halg_size); + + /* Map the digest's algorithm identifier */ + if (do_mapping) { + TPM_MEMUNMAP(mapping, mapping_size); + mapping_size = halg_size; + mapping = TPM_MEMREMAP((unsigned long)marker, + mapping_size); + if (!mapping) { + size = 0; + goto out; + } + } else { + mapping = marker; + } + + memcpy(&halg, mapping, halg_size); marker = marker + halg_size; + for (j = 0; j < efispecid->num_algs; j++) { if (halg == efispecid->digest_sizes[j].alg_id) { marker += @@ -166,18 +227,41 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, } } /* Algorithm without known length. Such event is unparseable. */ - if (j == efispecid->num_algs) - return 0; + if (j == efispecid->num_algs) { + size = 0; + goto out; + } + } + + /* + * Map the event size - we don't read from the event itself, so + * we don't need to map it + */ + if (do_mapping) { + TPM_MEMUNMAP(mapping, mapping_size); + mapping_size += sizeof(event_field->event_size); + mapping = TPM_MEMREMAP((unsigned long)marker, + mapping_size); + if (!mapping) { + size = 0; + goto out; + } + } else { + mapping = marker; } - event_field = (struct tcg_event_field *)marker; + event_field = (struct tcg_event_field *)mapping; + marker = marker + sizeof(event_field->event_size) + event_field->event_size; size = marker - marker_start; if ((event->event_type == 0) && (event_field->event_size == 0)) - return 0; - + size = 0; +out: + if (do_mapping) + TPM_MEMUNMAP(mapping, mapping_size); return size; } + #endif -- cgit v1.2.3 From 166a2809d65b282272c474835ec22c882a39ca1b Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 7 Jun 2019 13:51:47 -0700 Subject: tpm: Don't duplicate events from the final event log in the TCG2 log After the first call to GetEventLog() on UEFI systems using the TCG2 crypto agile log format, any further log events (other than those triggered by ExitBootServices()) will be logged in both the main log and also in the Final Events Log. While the kernel only calls GetEventLog() immediately before ExitBootServices(), we can't control whether earlier parts of the boot process have done so. This will result in log entries that exist in both logs, and so the current approach of simply appending the Final Event Log to the main log will result in events being duplicated. We can avoid this problem by looking at the size of the Final Event Log just before we call ExitBootServices() and exporting this to the main kernel. The kernel can then skip over all events that occured before ExitBootServices() and only append events that were not also logged to the main log. Signed-off-by: Matthew Garrett Reported-by: Joe Richey Suggested-by: Joe Richey Acked-by: Ard Biesheuvel Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/eventlog/efi.c | 11 ++++++++++- drivers/firmware/efi/libstub/tpm.c | 30 ++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 3 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c index 73b981026627..6bb023de17f1 100644 --- a/drivers/char/tpm/eventlog/efi.c +++ b/drivers/char/tpm/eventlog/efi.c @@ -75,6 +75,8 @@ int tpm_read_log_efi(struct tpm_chip *chip) goto out; } + efi_tpm_final_log_size -= log_tbl->final_events_preboot_size; + tmp = krealloc(log->bios_event_log, log_size + efi_tpm_final_log_size, GFP_KERNEL); @@ -85,8 +87,15 @@ int tpm_read_log_efi(struct tpm_chip *chip) } log->bios_event_log = tmp; + + /* + * Copy any of the final events log that didn't also end up in the + * main log. Events can be logged in both if events are generated + * between GetEventLog() and ExitBootServices(). + */ memcpy((void *)log->bios_event_log + log_size, - final_tbl->events, efi_tpm_final_log_size); + final_tbl->events + log_tbl->final_events_preboot_size, + efi_tpm_final_log_size); log->bios_event_log_end = log->bios_event_log + log_size + efi_tpm_final_log_size; diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index 6b3b507a54eb..eb9af83e4d59 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -64,11 +64,13 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) efi_status_t status; efi_physical_addr_t log_location = 0, log_last_entry = 0; struct linux_efi_tpm_eventlog *log_tbl = NULL; + struct efi_tcg2_final_events_table *final_events_table; unsigned long first_entry_addr, last_entry_addr; size_t log_size, last_entry_size; efi_bool_t truncated; int version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2; void *tcg2_protocol = NULL; + int final_events_size = 0; status = efi_call_early(locate_protocol, &tcg2_guid, NULL, &tcg2_protocol); @@ -134,8 +136,36 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) return; } + /* + * Figure out whether any events have already been logged to the + * final events structure, and if so how much space they take up + */ + final_events_table = get_efi_config_table(sys_table_arg, + LINUX_EFI_TPM_FINAL_LOG_GUID); + if (final_events_table && final_events_table->nr_events) { + struct tcg_pcr_event2_head *header; + int offset; + void *data; + int event_size; + int i = final_events_table->nr_events; + + data = (void *)final_events_table; + offset = sizeof(final_events_table->version) + + sizeof(final_events_table->nr_events); + + while (i > 0) { + header = data + offset + final_events_size; + event_size = __calc_tpm2_event_size(header, + (void *)(long)log_location, + false); + final_events_size += event_size; + i--; + } + } + memset(log_tbl, 0, sizeof(*log_tbl) + log_size); log_tbl->size = log_size; + log_tbl->final_events_preboot_size = final_events_size; log_tbl->version = version; memcpy(log_tbl->log, (void *) first_entry_addr, log_size); diff --git a/include/linux/efi.h b/include/linux/efi.h index b391263d8ec6..f87fabea4a85 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1708,6 +1708,7 @@ struct linux_efi_random_seed { struct linux_efi_tpm_eventlog { u32 size; + u32 final_events_preboot_size; u8 version; u8 log[]; }; -- cgit v1.2.3 From 792c4e9d0bbb53b34bf1c07c2ef25609d746c57d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 20 Jun 2019 07:03:47 +0000 Subject: net/mlx5: Convert mkey_table to XArray The lock protecting the data structure does not need to be an rwlock. The only read access to the lock is in an error path, and if that's limiting your scalability, you have bigger performance problems. Eliminate mlx5_mkey_table in favour of using the xarray directly. reg_mr_callback must use GFP_ATOMIC for allocating XArray nodes as it may be called in interrupt context. This also fixes a minor bug where SRCU locking was being used on the radix tree read side, when RCU was needed too. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 8 ++++---- drivers/infiniband/hw/mlx5/devx.c | 18 ++++-------------- drivers/infiniband/hw/mlx5/mr.c | 10 +++++----- drivers/infiniband/hw/mlx5/odp.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 27 +++++++++++---------------- include/linux/mlx5/driver.h | 13 ++----------- include/linux/mlx5/qp.h | 5 ----- 7 files changed, 31 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2e2e65f00257..0220736b073e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -522,9 +522,9 @@ repoll: case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; - read_lock(&dev->mdev->priv.mkey_table.lock); - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + xa_lock(&dev->mdev->priv.mkey_table); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -537,7 +537,7 @@ repoll: mr->sig->err_item.expected, mr->sig->err_item.actual); - read_unlock(&dev->mdev->priv.mkey_table.lock); + xa_unlock(&dev->mdev->priv.mkey_table); goto repoll; } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 80b42d069328..931f587dfb8f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1043,13 +1043,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, struct mlx5_ib_dev *dev, void *in, void *out) { - struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; - unsigned long flags; struct mlx5_core_mkey *mkey; void *mkc; u8 key; - int err; mkey = &devx_mr->mmkey; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -1062,11 +1059,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), - mkey); - write_unlock_irqrestore(&table->lock, flags); - return err; + return xa_err(xa_store(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1117,12 +1111,8 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu) */ static void devx_cleanup_mkey(struct devx_obj *obj) { - struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; - unsigned long flags; - - write_lock_irqsave(&table->lock, flags); - radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); - write_unlock_irqrestore(&table->lock, flags); + xa_erase(&obj->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); } static int devx_obj_cleanup(struct ib_uobject *uobject, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5f09699fab98..83b452d977d4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct xarray *mkeys = &dev->mdev->priv.mkey_table; int err; spin_lock_irqsave(&ent->lock, flags); @@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey); + xa_lock_irqsave(mkeys, flags); + err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC)); + xa_unlock_irqrestore(mkeys, flags); if (err) pr_err("Error inserting to mkey tree. 0x%x\n", -err); - write_unlock_irqrestore(&table->lock, flags); if (!completion_done(&ent->compl)) complete(&ent->compl); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 12ccee1eb047..c594489eb2d7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -768,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, bcnt -= *bytes_committed; next_mr: - mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; @@ -1686,8 +1686,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(sg_list[i].lkey)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(sg_list[i].lkey)); mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); atomic_dec(&mr->num_pending_prefetch); } @@ -1706,8 +1706,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(sg_list[i].lkey)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(sg_list[i].lkey)); if (!mmkey || mmkey->key != sg_list[i].lkey) { ret = false; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d8ab633406c2..87f77ded78d4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -452,13 +452,6 @@ struct mlx5_qp_table { struct radix_tree_root tree; }; -struct mlx5_mkey_table { - /* protect radix tree - */ - rwlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_vf_context { int enabled; u64 port_guid; @@ -546,9 +539,7 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - /* start: mkey staff */ - struct mlx5_mkey_table mkey_table; - /* end: mkey staff */ + struct xarray mkey_table; /* start: alloc staff */ /* protect buffer alocation according to numa node */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3ba4edbd17a6..d1f353c64797 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -551,11 +551,6 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } -static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) -{ - return radix_tree_lookup(&dev->priv.mkey_table.tree, key); -} - int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *qp, u32 *in, int inlen, -- cgit v1.2.3 From 4b85faed211ccfbcc7f3adf1cd62f0b00d1a172b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jun 2019 16:06:10 +0200 Subject: dma-mapping: add a dma_alloc_need_uncached helper Check if we need to allocate uncached memory for a device given the allocation flags. Switch over the uncached segment check to this helper to deal with architectures that do not support the dma_cache_sync operation and thus should not returned cacheable memory for DMA_ATTR_NON_CONSISTENT allocations. Signed-off-by: Christoph Hellwig --- include/linux/dma-noncoherent.h | 14 ++++++++++++++ kernel/dma/direct.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 7e0126a04e02..732919ac5c11 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -20,6 +20,20 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +/* + * Check if an allocation needs to be marked uncached to be coherent. + */ +static inline bool dma_alloc_need_uncached(struct device *dev, + unsigned long attrs) +{ + if (dev_is_dma_coherent(dev)) + return false; + if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && + (attrs & DMA_ATTR_NON_CONSISTENT)) + return false; + return true; +} + void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index b67f0aa08aa3..c2893713bf80 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -160,7 +160,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, memset(ret, 0, size); if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && - !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + dma_alloc_need_uncached(dev, attrs)) { arch_dma_prep_coherent(page, size); ret = uncached_kernel_address(ret); } @@ -182,7 +182,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && - !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) + dma_alloc_need_uncached(dev, attrs)) cpu_addr = cached_kernel_address(cpu_addr); __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } -- cgit v1.2.3 From d48e0cd8fcaf314175a15d3076d7a1e71bd4e628 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 24 Jun 2019 11:15:39 +0200 Subject: timekeeping: Boot should be boottime for coarse ns accessor Somewhere in all the patchsets before, this cleanup got lost. Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Gleixner Cc: Arnd Bergmann Link: https://lkml.kernel.org/r/20190624091539.13512-1-Jason@zx2c4.com --- Documentation/core-api/timekeeping.rst | 2 +- include/linux/timekeeping.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst index 15fc58e85ef9..20ee447a50f3 100644 --- a/Documentation/core-api/timekeeping.rst +++ b/Documentation/core-api/timekeeping.rst @@ -105,7 +105,7 @@ Some additional variants exist for more specialized cases: ktime_t ktime_get_coarse_clocktai( void ) .. c:function:: u64 ktime_get_coarse_ns( void ) - u64 ktime_get_coarse_boot_ns( void ) + u64 ktime_get_coarse_boottime_ns( void ) u64 ktime_get_coarse_real_ns( void ) u64 ktime_get_coarse_clocktai_ns( void ) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index dcffc00755f2..b27e2ffa96c1 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -131,7 +131,7 @@ static inline u64 ktime_get_coarse_real_ns(void) return ktime_to_ns(ktime_get_coarse_real()); } -static inline u64 ktime_get_coarse_boot_ns(void) +static inline u64 ktime_get_coarse_boottime_ns(void) { return ktime_to_ns(ktime_get_coarse_boottime()); } -- cgit v1.2.3 From ec6516bfbaf72e7c81811162b6de96322e32a027 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Jun 2019 10:55:31 +0900 Subject: pinctrl: remove unneeded #ifdef around declarations What is the point in surrounding the whole of declarations with ifdef like this? #ifdef CONFIG_FOO int foo(void); #endif If CONFIG_FOO is not defined, all callers of foo() will fail with implicit declaration errors since the top Makefile adds -Werror-implicit-function-declaration to KBUILD_CFLAGS. This breaks the build earlier when you are doing something wrong. That's it. Anyway, it will fail to link since the definition of foo() is not compiled. In summary, these ifdef are unneeded. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 20 ++++++-------------- include/linux/pinctrl/pinconf.h | 4 ---- include/linux/pinctrl/pinctrl.h | 4 ---- include/linux/pinctrl/pinmux.h | 4 ---- 4 files changed, 6 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 72d06d6a3099..673828a52294 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -12,6 +12,12 @@ #ifndef __LINUX_PINCTRL_PINCONF_GENERIC_H #define __LINUX_PINCTRL_PINCONF_GENERIC_H +#include +#include + +struct pinctrl_dev; +struct pinctrl_map; + /** * enum pin_config_param - possible pin configuration parameters * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it @@ -159,9 +165,6 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param return PIN_CONF_PACKED(param, argument); } -#ifdef CONFIG_GENERIC_PINCONF - -#ifdef CONFIG_DEBUG_FS #define PCONFDUMP(a, b, c, d) { \ .param = a, .display = b, .format = c, .has_arg = d \ } @@ -172,14 +175,6 @@ struct pin_config_item { const char * const format; bool has_arg; }; -#endif /* CONFIG_DEBUG_FS */ - -#ifdef CONFIG_OF - -#include -#include -struct pinctrl_dev; -struct pinctrl_map; struct pinconf_generic_params { const char * const property; @@ -224,8 +219,5 @@ static inline int pinconf_generic_dt_node_to_map_all( return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_INVALID); } -#endif - -#endif /* CONFIG_GENERIC_PINCONF */ #endif /* __LINUX_PINCTRL_PINCONF_GENERIC_H */ diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 9bebc3554809..513883dcc5a9 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -12,8 +12,6 @@ #ifndef __LINUX_PINCTRL_PINCONF_H #define __LINUX_PINCTRL_PINCONF_H -#ifdef CONFIG_PINCONF - #include struct pinctrl_dev; @@ -67,6 +65,4 @@ struct pinconf_ops { unsigned long config); }; -#endif - #endif /* __LINUX_PINCTRL_PINCONF_H */ diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 36a79fe7b84f..27738164daa7 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -12,8 +12,6 @@ #ifndef __LINUX_PINCTRL_PINCTRL_H #define __LINUX_PINCTRL_PINCTRL_H -#ifdef CONFIG_PINCTRL - #include #include #include @@ -203,6 +201,4 @@ extern const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev); extern const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev); extern void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev); -#endif /* !CONFIG_PINCTRL */ - #endif /* __LINUX_PINCTRL_PINCTRL_H */ diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index ace60d775b20..566a5fe8eab5 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -16,8 +16,6 @@ #include #include -#ifdef CONFIG_PINMUX - struct pinctrl_dev; /** @@ -85,6 +83,4 @@ struct pinmux_ops { bool strict; }; -#endif /* CONFIG_PINMUX */ - #endif /* __LINUX_PINCTRL_PINMUX_H */ -- cgit v1.2.3 From d8ca7d184b33af7913c244900df77c6cad6a5590 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 24 Jun 2019 00:08:31 +0300 Subject: regulator: core: Introduce API for regulators coupling customization Right now regulator core supports only one type of regulators coupling, the "voltage max-spread" which keeps voltages of coupled regulators in a given range from each other. A more sophisticated coupling may be required in practice, one example is the NVIDIA Tegra SoCs which besides the max-spreading have other restrictions that must be adhered. Introduce API that allow platforms to provide their own customized coupling algorithms. Signed-off-by: Dmitry Osipenko Signed-off-by: Mark Brown --- drivers/regulator/core.c | 136 ++++++++++++++++++++++++++++++++------ drivers/regulator/of_regulator.c | 63 ++++++++++++------ include/linux/regulator/coupler.h | 62 +++++++++++++++++ include/linux/regulator/driver.h | 6 +- include/linux/regulator/machine.h | 2 +- 5 files changed, 225 insertions(+), 44 deletions(-) create mode 100644 include/linux/regulator/coupler.h (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 955a0a15b9cb..12c870f790f5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_map_list); static LIST_HEAD(regulator_ena_gpio_list); static LIST_HEAD(regulator_supply_alias_list); +static LIST_HEAD(regulator_coupler_list); static bool has_full_constraints; static struct dentry *debugfs_root; @@ -3439,11 +3441,10 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev, struct coupling_desc *c_desc = &rdev->coupling_desc; struct regulator_dev **c_rdevs = c_desc->coupled_rdevs; struct regulation_constraints *constraints = rdev->constraints; - int max_spread = constraints->max_spread; int desired_min_uV = 0, desired_max_uV = INT_MAX; int max_current_uV = 0, min_current_uV = INT_MAX; int highest_min_uV = 0, target_uV, possible_uV; - int i, ret; + int i, ret, max_spread; bool done; *current_uV = -1; @@ -3497,6 +3498,8 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev, } } + max_spread = constraints->max_spread[0]; + /* * Let target_uV be equal to the desired one if possible. * If not, set it to minimum voltage, allowed by other coupled @@ -3578,9 +3581,11 @@ static int regulator_balance_voltage(struct regulator_dev *rdev, struct regulator_dev **c_rdevs; struct regulator_dev *best_rdev; struct coupling_desc *c_desc = &rdev->coupling_desc; + struct regulator_coupler *coupler = c_desc->coupler; int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev; - bool best_c_rdev_done, c_rdev_done[MAX_COUPLED]; unsigned int delta, best_delta; + unsigned long c_rdev_done = 0; + bool best_c_rdev_done; c_rdevs = c_desc->coupled_rdevs; n_coupled = c_desc->n_coupled; @@ -3597,8 +3602,9 @@ static int regulator_balance_voltage(struct regulator_dev *rdev, return -EPERM; } - for (i = 0; i < n_coupled; i++) - c_rdev_done[i] = false; + /* Invoke custom balancer for customized couplers */ + if (coupler && coupler->balance_voltage) + return coupler->balance_voltage(coupler, rdev, state); /* * Find the best possible voltage change on each loop. Leave the loop @@ -3625,7 +3631,7 @@ static int regulator_balance_voltage(struct regulator_dev *rdev, */ int optimal_uV = 0, optimal_max_uV = 0, current_uV = 0; - if (c_rdev_done[i]) + if (test_bit(i, &c_rdev_done)) continue; ret = regulator_get_optimal_voltage(c_rdevs[i], @@ -3660,7 +3666,8 @@ static int regulator_balance_voltage(struct regulator_dev *rdev, if (ret < 0) goto out; - c_rdev_done[best_c_rdev] = best_c_rdev_done; + if (best_c_rdev_done) + set_bit(best_c_rdev, &c_rdev_done); } while (n_coupled > 1); @@ -4712,8 +4719,60 @@ static int regulator_register_resolve_supply(struct device *dev, void *data) return 0; } +int regulator_coupler_register(struct regulator_coupler *coupler) +{ + mutex_lock(®ulator_list_mutex); + list_add_tail(&coupler->list, ®ulator_coupler_list); + mutex_unlock(®ulator_list_mutex); + + return 0; +} + +static struct regulator_coupler * +regulator_find_coupler(struct regulator_dev *rdev) +{ + struct regulator_coupler *coupler; + int err; + + /* + * Note that regulators are appended to the list and the generic + * coupler is registered first, hence it will be attached at last + * if nobody cared. + */ + list_for_each_entry_reverse(coupler, ®ulator_coupler_list, list) { + err = coupler->attach_regulator(coupler, rdev); + if (!err) { + if (!coupler->balance_voltage && + rdev->coupling_desc.n_coupled > 2) + goto err_unsupported; + + return coupler; + } + + if (err < 0) + return ERR_PTR(err); + + if (err == 1) + continue; + + break; + } + + return ERR_PTR(-EINVAL); + +err_unsupported: + if (coupler->detach_regulator) + coupler->detach_regulator(coupler, rdev); + + rdev_err(rdev, + "Voltage balancing for multiple regulator couples is unimplemented\n"); + + return ERR_PTR(-EPERM); +} + static void regulator_resolve_coupling(struct regulator_dev *rdev) { + struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *c_desc = &rdev->coupling_desc; int n_coupled = c_desc->n_coupled; struct regulator_dev *c_rdev; @@ -4729,6 +4788,12 @@ static void regulator_resolve_coupling(struct regulator_dev *rdev) if (!c_rdev) continue; + if (c_rdev->coupling_desc.coupler != coupler) { + rdev_err(rdev, "coupler mismatch with %s\n", + rdev_get_name(c_rdev)); + return; + } + regulator_lock(c_rdev); c_desc->coupled_rdevs[i] = c_rdev; @@ -4742,10 +4807,12 @@ static void regulator_resolve_coupling(struct regulator_dev *rdev) static void regulator_remove_coupling(struct regulator_dev *rdev) { + struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *__c_desc, *c_desc = &rdev->coupling_desc; struct regulator_dev *__c_rdev, *c_rdev; unsigned int __n_coupled, n_coupled; int i, k; + int err; n_coupled = c_desc->n_coupled; @@ -4775,21 +4842,33 @@ static void regulator_remove_coupling(struct regulator_dev *rdev) c_desc->coupled_rdevs[i] = NULL; c_desc->n_resolved--; } + + if (coupler && coupler->detach_regulator) { + err = coupler->detach_regulator(coupler, rdev); + if (err) + rdev_err(rdev, "failed to detach from coupler: %d\n", + err); + } + + kfree(rdev->coupling_desc.coupled_rdevs); + rdev->coupling_desc.coupled_rdevs = NULL; } static int regulator_init_coupling(struct regulator_dev *rdev) { - int n_phandles; + int err, n_phandles; + size_t alloc_size; if (!IS_ENABLED(CONFIG_OF)) n_phandles = 0; else n_phandles = of_get_n_coupled(rdev); - if (n_phandles + 1 > MAX_COUPLED) { - rdev_err(rdev, "too many regulators coupled\n"); - return -EPERM; - } + alloc_size = sizeof(*rdev) * (n_phandles + 1); + + rdev->coupling_desc.coupled_rdevs = kzalloc(alloc_size, GFP_KERNEL); + if (!rdev->coupling_desc.coupled_rdevs) + return -ENOMEM; /* * Every regulator should always have coupling descriptor filled with @@ -4803,23 +4882,35 @@ static int regulator_init_coupling(struct regulator_dev *rdev) if (n_phandles == 0) return 0; - /* regulator, which can't change its voltage, can't be coupled */ - if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { - rdev_err(rdev, "voltage operation not allowed\n"); + if (!of_check_coupling_data(rdev)) return -EPERM; - } - if (rdev->constraints->max_spread <= 0) { - rdev_err(rdev, "wrong max_spread value\n"); - return -EPERM; + rdev->coupling_desc.coupler = regulator_find_coupler(rdev); + if (IS_ERR(rdev->coupling_desc.coupler)) { + err = PTR_ERR(rdev->coupling_desc.coupler); + rdev_err(rdev, "failed to get coupler: %d\n", err); + return err; } - if (!of_check_coupling_data(rdev)) + return 0; +} + +static int generic_coupler_attach(struct regulator_coupler *coupler, + struct regulator_dev *rdev) +{ + if (rdev->coupling_desc.n_coupled > 2) { + rdev_err(rdev, + "Voltage balancing for multiple regulator couples is unimplemented\n"); return -EPERM; + } return 0; } +static struct regulator_coupler generic_regulator_coupler = { + .attach_regulator = generic_coupler_attach, +}; + /** * regulator_register - register regulator * @regulator_desc: regulator to register @@ -4981,7 +5072,9 @@ regulator_register(const struct regulator_desc *regulator_desc, if (ret < 0) goto wash; + mutex_lock(®ulator_list_mutex); ret = regulator_init_coupling(rdev); + mutex_unlock(®ulator_list_mutex); if (ret < 0) goto wash; @@ -5030,6 +5123,7 @@ regulator_register(const struct regulator_desc *regulator_desc, unset_supplies: mutex_lock(®ulator_list_mutex); unset_regulator_supplies(rdev); + regulator_remove_coupling(rdev); mutex_unlock(®ulator_list_mutex); wash: kfree(rdev->constraints); @@ -5485,6 +5579,8 @@ static int __init regulator_init(void) #endif regulator_dummy_init(); + regulator_coupler_register(&generic_regulator_coupler); + return ret; } diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 6dca0ba044d8..db1cb2714b92 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -25,7 +25,8 @@ static const char *const regulator_states[PM_SUSPEND_MAX + 1] = { [PM_SUSPEND_MAX] = "regulator-state-disk", }; -static void of_get_regulation_constraints(struct device_node *np, +static int of_get_regulation_constraints(struct device *dev, + struct device_node *np, struct regulator_init_data **init_data, const struct regulator_desc *desc) { @@ -34,8 +35,13 @@ static void of_get_regulation_constraints(struct device_node *np, struct device_node *suspend_np; unsigned int mode; int ret, i, len; + int n_phandles; u32 pval; + n_phandles = of_count_phandle_with_args(np, "regulator-coupled-with", + NULL); + n_phandles = max(n_phandles, 0); + constraints->name = of_get_property(np, "regulator-name", NULL); if (!of_property_read_u32(np, "regulator-min-microvolt", &pval)) @@ -167,9 +173,17 @@ static void of_get_regulation_constraints(struct device_node *np, if (!of_property_read_u32(np, "regulator-system-load", &pval)) constraints->system_load = pval; - if (!of_property_read_u32(np, "regulator-coupled-max-spread", - &pval)) - constraints->max_spread = pval; + if (n_phandles) { + constraints->max_spread = devm_kzalloc(dev, + sizeof(*constraints->max_spread) * n_phandles, + GFP_KERNEL); + + if (!constraints->max_spread) + return -ENOMEM; + + of_property_read_u32_array(np, "regulator-coupled-max-spread", + constraints->max_spread, n_phandles); + } if (!of_property_read_u32(np, "regulator-max-step-microvolt", &pval)) @@ -246,6 +260,8 @@ static void of_get_regulation_constraints(struct device_node *np, suspend_state = NULL; suspend_np = NULL; } + + return 0; } /** @@ -271,7 +287,9 @@ struct regulator_init_data *of_get_regulator_init_data(struct device *dev, if (!init_data) return NULL; /* Out of memory? */ - of_get_regulation_constraints(node, &init_data, desc); + if (of_get_regulation_constraints(dev, node, &init_data, desc)) + return NULL; + return init_data; } EXPORT_SYMBOL_GPL(of_get_regulator_init_data); @@ -477,7 +495,8 @@ int of_get_n_coupled(struct regulator_dev *rdev) /* Looks for "to_find" device_node in src's "regulator-coupled-with" property */ static bool of_coupling_find_node(struct device_node *src, - struct device_node *to_find) + struct device_node *to_find, + int *index) { int n_phandles, i; bool found = false; @@ -499,8 +518,10 @@ static bool of_coupling_find_node(struct device_node *src, of_node_put(tmp); - if (found) + if (found) { + *index = i; break; + } } return found; @@ -521,22 +542,23 @@ static bool of_coupling_find_node(struct device_node *src, */ bool of_check_coupling_data(struct regulator_dev *rdev) { - int max_spread = rdev->constraints->max_spread; struct device_node *node = rdev->dev.of_node; int n_phandles = of_get_n_coupled(rdev); struct device_node *c_node; + int index; int i; bool ret = true; - if (max_spread <= 0) { - dev_err(&rdev->dev, "max_spread value invalid\n"); - return false; - } - /* iterate over rdev's phandles */ for (i = 0; i < n_phandles; i++) { + int max_spread = rdev->constraints->max_spread[i]; int c_max_spread, c_n_phandles; + if (max_spread <= 0) { + dev_err(&rdev->dev, "max_spread value invalid\n"); + return false; + } + c_node = of_parse_phandle(node, "regulator-coupled-with", i); @@ -553,22 +575,23 @@ bool of_check_coupling_data(struct regulator_dev *rdev) goto clean; } - if (of_property_read_u32(c_node, "regulator-coupled-max-spread", - &c_max_spread)) { + if (!of_coupling_find_node(c_node, node, &index)) { + dev_err(&rdev->dev, "missing 2-way linking for coupled regulators\n"); ret = false; goto clean; } - if (c_max_spread != max_spread) { - dev_err(&rdev->dev, - "coupled regulators max_spread mismatch\n"); + if (of_property_read_u32_index(c_node, "regulator-coupled-max-spread", + index, &c_max_spread)) { ret = false; goto clean; } - if (!of_coupling_find_node(c_node, node)) { - dev_err(&rdev->dev, "missing 2-way linking for coupled regulators\n"); + if (c_max_spread != max_spread) { + dev_err(&rdev->dev, + "coupled regulators max_spread mismatch\n"); ret = false; + goto clean; } clean: diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h new file mode 100644 index 000000000000..98dd1f74d605 --- /dev/null +++ b/include/linux/regulator/coupler.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * coupler.h -- SoC Regulator support, coupler API. + * + * Regulator Coupler Interface. + */ + +#ifndef __LINUX_REGULATOR_COUPLER_H_ +#define __LINUX_REGULATOR_COUPLER_H_ + +#include +#include + +struct regulator_coupler; +struct regulator_dev; + +/** + * struct regulator_coupler - customized regulator's coupler + * + * Regulator's coupler allows to customize coupling algorithm. + * + * @list: couplers list entry + * @attach_regulator: Callback invoked on creation of a coupled regulator, + * couples are unresolved at this point. The callee should + * check that it could handle the regulator and return 0 on + * success, -errno on failure and 1 if given regulator is + * not suitable for this coupler (case of having multiple + * regulators in a system). Callback shall be implemented. + * @detach_regulator: Callback invoked on destruction of a coupled regulator. + * This callback is optional and could be NULL. + * @balance_voltage: Callback invoked when voltage of a coupled regulator is + * changing. Called with all of the coupled rdev's being held + * under "consumer lock". The callee should perform voltage + * balancing, changing voltage of the coupled regulators as + * needed. It's up to the coupler to verify the voltage + * before changing it in hardware, i.e. coupler should + * check consumer's min/max and etc. This callback is + * optional and could be NULL, in which case a generic + * voltage balancer will be used. + */ +struct regulator_coupler { + struct list_head list; + + int (*attach_regulator)(struct regulator_coupler *coupler, + struct regulator_dev *rdev); + int (*detach_regulator)(struct regulator_coupler *coupler, + struct regulator_dev *rdev); + int (*balance_voltage)(struct regulator_coupler *coupler, + struct regulator_dev *rdev, + suspend_state_t state); +}; + +#ifdef CONFIG_REGULATOR +int regulator_coupler_register(struct regulator_coupler *coupler); +#else +static inline int regulator_coupler_register(struct regulator_coupler *coupler) +{ + return 0; +} +#endif + +#endif diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 377da2357118..31b38a2b6995 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -15,8 +15,6 @@ #ifndef __LINUX_REGULATOR_DRIVER_H_ #define __LINUX_REGULATOR_DRIVER_H_ -#define MAX_COUPLED 2 - #include #include #include @@ -426,7 +424,8 @@ struct regulator_config { * incremented. */ struct coupling_desc { - struct regulator_dev *coupled_rdevs[MAX_COUPLED]; + struct regulator_dev **coupled_rdevs; + struct regulator_coupler *coupler; int n_resolved; int n_coupled; }; @@ -552,4 +551,5 @@ void regulator_unlock(struct regulator_dev *rdev); */ int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc, unsigned int selector); + #endif diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 1d34a70ffda2..21db06e5c1ed 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -156,7 +156,7 @@ struct regulation_constraints { int system_load; /* used for coupled regulators */ - int max_spread; + u32 *max_spread; /* used for changing voltage in steps */ int max_uV_step; -- cgit v1.2.3 From d22b85a1b97d12a4940ef9d778f6122546736f78 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 24 Jun 2019 00:08:32 +0300 Subject: regulator: core: Expose some of core functions needed by couplers Expose some of internal functions that are required for implementation of customized regulator couplers. Signed-off-by: Dmitry Osipenko Signed-off-by: Mark Brown --- drivers/regulator/core.c | 58 ++++++++++++++++++--------------------- include/linux/regulator/coupler.h | 35 +++++++++++++++++++++++ 2 files changed, 62 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 12c870f790f5..b9bc45128b8c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -100,7 +100,6 @@ struct regulator_supply_alias { static int _regulator_is_enabled(struct regulator_dev *rdev); static int _regulator_disable(struct regulator *regulator); -static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); static int _notifier_call_chain(struct regulator_dev *rdev, @@ -109,15 +108,12 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); static int regulator_balance_voltage(struct regulator_dev *rdev, suspend_state_t state); -static int regulator_set_voltage_rdev(struct regulator_dev *rdev, - int min_uV, int max_uV, - suspend_state_t state); static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, const char *supply_name); static void _regulator_put(struct regulator *regulator); -static const char *rdev_get_name(struct regulator_dev *rdev) +const char *rdev_get_name(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->name) return rdev->constraints->name; @@ -431,8 +427,8 @@ static struct device_node *of_get_regulator(struct device *dev, const char *supp } /* Platform voltage constraint check */ -static int regulator_check_voltage(struct regulator_dev *rdev, - int *min_uV, int *max_uV) +int regulator_check_voltage(struct regulator_dev *rdev, + int *min_uV, int *max_uV) { BUG_ON(*min_uV > *max_uV); @@ -464,9 +460,9 @@ static int regulator_check_states(suspend_state_t state) /* Make sure we select a voltage that suits the needs of all * regulator consumers */ -static int regulator_check_consumers(struct regulator_dev *rdev, - int *min_uV, int *max_uV, - suspend_state_t state) +int regulator_check_consumers(struct regulator_dev *rdev, + int *min_uV, int *max_uV, + suspend_state_t state) { struct regulator *regulator; struct regulator_voltage *voltage; @@ -577,7 +573,7 @@ static ssize_t regulator_uV_show(struct device *dev, ssize_t ret; regulator_lock(rdev); - ret = sprintf(buf, "%d\n", _regulator_get_voltage(rdev)); + ret = sprintf(buf, "%d\n", regulator_get_voltage_rdev(rdev)); regulator_unlock(rdev); return ret; @@ -948,7 +944,7 @@ static int drms_uA_update(struct regulator_dev *rdev) rdev_err(rdev, "failed to set load %d\n", current_uA); } else { /* get output voltage */ - output_uV = _regulator_get_voltage(rdev); + output_uV = regulator_get_voltage_rdev(rdev); if (output_uV <= 0) { rdev_err(rdev, "invalid output voltage found\n"); return -EINVAL; @@ -1061,7 +1057,7 @@ static void print_constraints(struct regulator_dev *rdev) if (!constraints->min_uV || constraints->min_uV != constraints->max_uV) { - ret = _regulator_get_voltage(rdev); + ret = regulator_get_voltage_rdev(rdev); if (ret > 0) count += scnprintf(buf + count, len - count, "at %d mV ", ret / 1000); @@ -1120,7 +1116,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, if (rdev->constraints->apply_uV && rdev->constraints->min_uV && rdev->constraints->max_uV) { int target_min, target_max; - int current_uV = _regulator_get_voltage(rdev); + int current_uV = regulator_get_voltage_rdev(rdev); if (current_uV == -ENOTRECOVERABLE) { /* This regulator can't be read and must be initialized */ @@ -1130,7 +1126,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, _regulator_do_set_voltage(rdev, rdev->constraints->min_uV, rdev->constraints->max_uV); - current_uV = _regulator_get_voltage(rdev); + current_uV = regulator_get_voltage_rdev(rdev); } if (current_uV < 0) { @@ -3072,7 +3068,7 @@ static int _regulator_call_set_voltage(struct regulator_dev *rdev, struct pre_voltage_change_data data; int ret; - data.old_uV = _regulator_get_voltage(rdev); + data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = min_uV; data.max_uV = max_uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, @@ -3096,7 +3092,7 @@ static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev, struct pre_voltage_change_data data; int ret; - data.old_uV = _regulator_get_voltage(rdev); + data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = uV; data.max_uV = uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, @@ -3149,7 +3145,7 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, unsigned int selector; int old_selector = -1; const struct regulator_ops *ops = rdev->desc->ops; - int old_uV = _regulator_get_voltage(rdev); + int old_uV = regulator_get_voltage_rdev(rdev); trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV); @@ -3176,7 +3172,7 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, best_val = ops->list_voltage(rdev, selector); else - best_val = _regulator_get_voltage(rdev); + best_val = regulator_get_voltage_rdev(rdev); } } else if (ops->set_voltage_sel) { @@ -3295,7 +3291,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, * changing the voltage. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { - current_uV = _regulator_get_voltage(rdev); + current_uV = regulator_get_voltage_rdev(rdev); if (min_uV <= current_uV && current_uV <= max_uV) { voltage->min_uV = min_uV; voltage->max_uV = max_uV; @@ -3332,8 +3328,8 @@ out: return ret; } -static int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, - int max_uV, suspend_state_t state) +int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, + int max_uV, suspend_state_t state) { int best_supply_uV = 0; int supply_change_uV = 0; @@ -3361,7 +3357,7 @@ static int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, best_supply_uV += rdev->desc->min_dropout_uV; - current_supply_uV = _regulator_get_voltage(rdev->supply->rdev); + current_supply_uV = regulator_get_voltage_rdev(rdev->supply->rdev); if (current_supply_uV < 0) { ret = current_supply_uV; goto out; @@ -3412,7 +3408,7 @@ static int regulator_limit_voltage_step(struct regulator_dev *rdev, return 1; if (*current_uV < 0) { - *current_uV = _regulator_get_voltage(rdev); + *current_uV = regulator_get_voltage_rdev(rdev); if (*current_uV < 0) return *current_uV; @@ -3517,7 +3513,7 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev, if (!_regulator_is_enabled(c_rdevs[i])) continue; - tmp_act = _regulator_get_voltage(c_rdevs[i]); + tmp_act = regulator_get_voltage_rdev(c_rdevs[i]); if (tmp_act < 0) return tmp_act; @@ -3559,7 +3555,7 @@ finish: if (n_coupled > 1 && *current_uV == -1) { if (_regulator_is_enabled(rdev)) { - ret = _regulator_get_voltage(rdev); + ret = regulator_get_voltage_rdev(rdev); if (ret < 0) return ret; @@ -3923,7 +3919,7 @@ out: } EXPORT_SYMBOL_GPL(regulator_sync_voltage); -static int _regulator_get_voltage(struct regulator_dev *rdev) +int regulator_get_voltage_rdev(struct regulator_dev *rdev) { int sel, ret; bool bypassed; @@ -3940,7 +3936,7 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) return -EPROBE_DEFER; } - return _regulator_get_voltage(rdev->supply->rdev); + return regulator_get_voltage_rdev(rdev->supply->rdev); } } @@ -3956,7 +3952,7 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) } else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) { ret = rdev->desc->fixed_uV; } else if (rdev->supply) { - ret = _regulator_get_voltage(rdev->supply->rdev); + ret = regulator_get_voltage_rdev(rdev->supply->rdev); } else { return -EINVAL; } @@ -3981,7 +3977,7 @@ int regulator_get_voltage(struct regulator *regulator) int ret; regulator_lock_dependent(regulator->rdev, &ww_ctx); - ret = _regulator_get_voltage(regulator->rdev); + ret = regulator_get_voltage_rdev(regulator->rdev); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; @@ -5377,7 +5373,7 @@ static void regulator_summary_show_subtree(struct seq_file *s, rdev->use_count, rdev->open_count, rdev->bypass_count, regulator_opmode_to_str(opmode)); - seq_printf(s, "%5dmV ", _regulator_get_voltage(rdev) / 1000); + seq_printf(s, "%5dmV ", regulator_get_voltage_rdev(rdev) / 1000); seq_printf(s, "%5dmA ", _regulator_get_current_limit_unlocked(rdev) / 1000); diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h index 98dd1f74d605..0212d6255e4e 100644 --- a/include/linux/regulator/coupler.h +++ b/include/linux/regulator/coupler.h @@ -52,11 +52,46 @@ struct regulator_coupler { #ifdef CONFIG_REGULATOR int regulator_coupler_register(struct regulator_coupler *coupler); +const char *rdev_get_name(struct regulator_dev *rdev); +int regulator_check_consumers(struct regulator_dev *rdev, + int *min_uV, int *max_uV, + suspend_state_t state); +int regulator_check_voltage(struct regulator_dev *rdev, + int *min_uV, int *max_uV); +int regulator_get_voltage_rdev(struct regulator_dev *rdev); +int regulator_set_voltage_rdev(struct regulator_dev *rdev, + int min_uV, int max_uV, + suspend_state_t state); #else static inline int regulator_coupler_register(struct regulator_coupler *coupler) { return 0; } +static inline const char *rdev_get_name(struct regulator_dev *rdev) +{ + return NULL; +} +static inline int regulator_check_consumers(struct regulator_dev *rdev, + int *min_uV, int *max_uV, + suspend_state_t state) +{ + return -EINVAL; +} +static inline int regulator_check_voltage(struct regulator_dev *rdev, + int *min_uV, int *max_uV) +{ + return -EINVAL; +} +static inline int regulator_get_voltage_rdev(struct regulator_dev *rdev) +{ + return -EINVAL; +} +static inline int regulator_set_voltage_rdev(struct regulator_dev *rdev, + int min_uV, int max_uV, + suspend_state_t state) +{ + return -EINVAL; +} #endif #endif -- cgit v1.2.3 From 4f41845b340783eaec9cc2840fe3cb9a00574054 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Jun 2019 12:51:25 +0100 Subject: iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag IO_PGTABLE_QUIRK_NO_DMA is a bit of a misnomer, since it's really just an indication of whether or not the page-table walker for the IOMMU is coherent with the CPU caches. Since cache coherency is more than just a quirk, replace the flag with its own field in the io_pgtable_cfg structure. Cc: Bjorn Andersson Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 4 +--- drivers/iommu/arm-smmu.c | 4 +--- drivers/iommu/io-pgtable-arm-v7s.c | 10 +++++----- drivers/iommu/io-pgtable-arm.c | 19 ++++++++----------- drivers/iommu/ipmmu-vmsa.c | 1 + include/linux/io-pgtable.h | 11 ++++------- 6 files changed, 20 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 65de2458999f..8ff8f61d9e1c 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1789,13 +1789,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENCY) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5e54cc0a28b3..009156bb6d42 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -895,13 +895,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 9a8a8870e267..8454de93e356 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -215,7 +215,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (table && !cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -249,7 +249,7 @@ static void __arm_v7s_free_table(void *table, int lvl, struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -261,7 +261,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->coherent_walk) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -727,7 +727,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -846,7 +845,8 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, + .quirks = IO_PGTABLE_QUIRK_ARM_NS, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2454ac11aa97..91d0a4228b58 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -252,7 +252,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, return NULL; pages = page_address(p); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -278,7 +278,7 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages((unsigned long)pages, get_order(size)); @@ -296,7 +296,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, { *ptep = pte; - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) __arm_lpae_sync_pte(ptep, cfg); } @@ -374,8 +374,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, old = cmpxchg64_relaxed(ptep, curr, new); - if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || - (old & ARM_LPAE_PTE_SW_SYNC)) + if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) return old; /* Even if it's not ours, there's no point waiting; just kick it */ @@ -416,8 +415,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, pte = arm_lpae_install_table(cptep, ptep, 0, cfg); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); - } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && - !(pte & ARM_LPAE_PTE_SW_SYNC)) { + } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { __arm_lpae_sync_pte(ptep, cfg); } @@ -799,7 +797,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -894,8 +892,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1230,7 +1227,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, - .quirks = IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9a380c10655e..12bcb95bdaa8 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -431,6 +431,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. For now, delegate it to the io-pgtable code. */ + domain->cfg.coherent_walk = false; domain->cfg.iommu_dev = domain->mmu->root->dev; /* diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 76969a564831..b5a450a3bb47 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -44,6 +44,8 @@ struct iommu_gather_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. + * @coherent_walk A flag to indicate whether or not page table walks made + * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the * page table walker. @@ -68,11 +70,6 @@ struct io_pgtable_cfg { * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). * - * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever - * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a - * software-emulated IOMMU), such that pagetable updates need not - * be treated as explicit DMA data. - * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. @@ -81,12 +78,12 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) - #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; + bool coherent_walk; const struct iommu_gather_ops *tlb; struct device *iommu_dev; -- cgit v1.2.3 From d98849aff87911013aadb730138ab728b52fc547 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jun 2019 16:17:27 +0200 Subject: dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code DMA_ATTR_NO_KERNEL_MAPPING is generally implemented by allocating normal cacheable pages or CMA memory, and then returning the page pointer as the opaque handle. Lift that code from the xtensa and generic dma remapping implementations into the generic dma-direct code so that we don't even call arch_dma_alloc for these allocations. Signed-off-by: Christoph Hellwig --- arch/xtensa/kernel/pci-dma.c | 8 +------- include/linux/dma-noncoherent.h | 2 ++ kernel/dma/direct.c | 14 ++++++++++++++ kernel/dma/remap.c | 13 ++----------- 4 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 9171bff76fc4..206771277dff 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -167,10 +167,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = phys_to_dma(dev, page_to_phys(page)); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - return page; - } - #ifdef CONFIG_MMU if (PageHighMem(page)) { void *p; @@ -196,9 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - page = vaddr; - } else if (platform_vaddr_uncached(vaddr)) { + if (platform_vaddr_uncached(vaddr)) { page = virt_to_page(platform_vaddr_to_cached(vaddr)); } else { #ifdef CONFIG_MMU diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 732919ac5c11..53ee36ecdf37 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -28,6 +28,8 @@ static inline bool dma_alloc_need_uncached(struct device *dev, { if (dev_is_dma_coherent(dev)) return false; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return false; if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && (attrs & DMA_ATTR_NON_CONSISTENT)) return false; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index fc354f4f490b..b90e1aede743 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -138,6 +138,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + /* return the page pointer as the opaque cookie */ + return page; + } + if (PageHighMem(page)) { /* * Depending on the cma= arguments and per-arch setup @@ -178,6 +186,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, { unsigned int page_order = get_order(size); + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + /* cpu_addr is a struct page cookie, not a kernel address */ + __dma_direct_free_pages(dev, size, cpu_addr); + return; + } + if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 0207e3764d52..a594aec07882 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -202,8 +202,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, size = PAGE_ALIGN(size); - if (!gfpflags_allow_blocking(flags) && - !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) { + if (!gfpflags_allow_blocking(flags)) { ret = dma_alloc_from_pool(size, &page, flags); if (!ret) return NULL; @@ -217,11 +216,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - ret = page; /* opaque cookie */ - goto done; - } - /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, VM_USERMAP, arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), @@ -240,10 +234,7 @@ done: void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - /* vaddr is a struct page cookie, not a kernel address */ - __dma_direct_free_pages(dev, size, vaddr); - } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { phys_addr_t phys = dma_to_phys(dev, dma_handle); struct page *page = pfn_to_page(__phys_to_pfn(phys)); -- cgit v1.2.3 From b2c200e3f2fd1158f5f1c93ccb2e0a27d96c4a7a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 18 Apr 2019 11:37:47 +0200 Subject: pwm: Add consumer device link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a device link between the PWM consumer and the PWM provider. This enforces the PWM user to get suspended before the PWM provider. It allows proper synchronization of suspend/resume sequences: the PWM user is responsible for properly stopping PWM, before the provider gets suspended: see [1]. Add the device link in: - of_pwm_get() - pwm_get() - devm_*pwm_get() variants as it requires a reference to the device for the PWM consumer. [1] https://lkml.org/lkml/2019/2/5/770 Suggested-by: Thierry Reding Acked-by: Uwe Kleine-König Signed-off-by: Fabrice Gasnier Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/pwm.h | 6 ++++-- 2 files changed, 51 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 3998ebd51db4..60b8ccc1fd7c 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -639,8 +639,35 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np) return ERR_PTR(-EPROBE_DEFER); } +static struct device_link *pwm_device_link_add(struct device *dev, + struct pwm_device *pwm) +{ + struct device_link *dl; + + if (!dev) { + /* + * No device for the PWM consumer has been provided. It may + * impact the PM sequence ordering: the PWM supplier may get + * suspended before the consumer. + */ + dev_warn(pwm->chip->dev, + "No consumer device specified to create a link to\n"); + return NULL; + } + + dl = device_link_add(dev, pwm->chip->dev, DL_FLAG_AUTOREMOVE_CONSUMER); + if (!dl) { + dev_err(dev, "failed to create device link to %s\n", + dev_name(pwm->chip->dev)); + return ERR_PTR(-EINVAL); + } + + return dl; +} + /** * of_pwm_get() - request a PWM via the PWM framework + * @dev: device for PWM consumer * @np: device node to get the PWM from * @con_id: consumer name * @@ -658,10 +685,12 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np) * Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded * error code on failure. */ -struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id) +struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, + const char *con_id) { struct pwm_device *pwm = NULL; struct of_phandle_args args; + struct device_link *dl; struct pwm_chip *pc; int index = 0; int err; @@ -692,6 +721,14 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id) if (IS_ERR(pwm)) goto put; + dl = pwm_device_link_add(dev, pwm); + if (IS_ERR(dl)) { + /* of_xlate ended up calling pwm_request_from_chip() */ + pwm_free(pwm); + pwm = ERR_CAST(dl); + goto put; + } + /* * If a consumer name was not given, try to look it up from the * "pwm-names" property if it exists. Otherwise use the name of @@ -767,6 +804,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) const char *dev_id = dev ? dev_name(dev) : NULL; struct pwm_device *pwm; struct pwm_chip *chip; + struct device_link *dl; unsigned int best = 0; struct pwm_lookup *p, *chosen = NULL; unsigned int match; @@ -774,7 +812,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) /* look up via DT first */ if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) - return of_pwm_get(dev->of_node, con_id); + return of_pwm_get(dev, dev->of_node, con_id); /* * We look up the provider in the static table typically provided by @@ -851,6 +889,12 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) if (IS_ERR(pwm)) return pwm; + dl = pwm_device_link_add(dev, pwm); + if (IS_ERR(dl)) { + pwm_free(pwm); + return ERR_CAST(dl); + } + pwm->args.period = chosen->period; pwm->args.polarity = chosen->polarity; @@ -943,7 +987,7 @@ struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, if (!ptr) return ERR_PTR(-ENOMEM); - pwm = of_pwm_get(np, con_id); + pwm = of_pwm_get(dev, np, con_id); if (!IS_ERR(pwm)) { *ptr = pwm; devres_add(dev, ptr); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index eaa5c6e3fc9f..8bf5d5f6267d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -405,7 +405,8 @@ struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); -struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id); +struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, + const char *con_id); void pwm_put(struct pwm_device *pwm); struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id); @@ -493,7 +494,8 @@ static inline struct pwm_device *pwm_get(struct device *dev, return ERR_PTR(-ENODEV); } -static inline struct pwm_device *of_pwm_get(struct device_node *np, +static inline struct pwm_device *of_pwm_get(struct device *dev, + struct device_node *np, const char *con_id) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From 6282edb72bed5324352522d732080d4c1b9dfed6 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 30 May 2019 12:50:43 +0200 Subject: clocksource/drivers/exynos_mct: Increase priority over ARM arch timer Exynos SoCs based on CA7/CA15 have 2 timer interfaces: custom Exynos MCT (Multi Core Timer) and standard ARM Architected Timers. There are use cases, where both timer interfaces are used simultanously. One of such examples is using Exynos MCT for the main system timer and ARM Architected Timers for the KVM and virtualized guests (KVM requires arch timers). Exynos Multi-Core Timer driver (exynos_mct) must be however started before ARM Architected Timers (arch_timer), because they both share some common hardware blocks (global system counter) and turning on MCT is needed to get ARM Architected Timer working properly. To ensure selecting Exynos MCT as the main system timer, increase MCT timer rating. To ensure proper starting order of both timers during suspend/resume cycle, increase MCT hotplug priority over ARM Archictected Timers. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: Daniel Lezcano --- drivers/clocksource/exynos_mct.c | 4 ++-- include/linux/cpuhotplug.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 34bd250d46c6..6aa10cbc1d59 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -209,7 +209,7 @@ static void exynos4_frc_resume(struct clocksource *cs) static struct clocksource mct_frc = { .name = "mct-frc", - .rating = 400, + .rating = 450, /* use value higher than ARM arch timer */ .read = exynos4_frc_read, .mask = CLOCKSOURCE_MASK(32), .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -464,7 +464,7 @@ static int exynos4_mct_starting_cpu(unsigned int cpu) evt->set_state_oneshot_stopped = set_state_shutdown; evt->tick_resume = set_state_shutdown; evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; - evt->rating = 450; + evt->rating = 500; /* use value higher than ARM arch timer */ exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5c6062206760..87c211adf49e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -116,10 +116,10 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, + CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, - CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, -- cgit v1.2.3 From 0e58983de0d89f6ee75daab1b0ea918cfcf6ddbf Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Sun, 4 Nov 2018 19:07:02 +0200 Subject: linux/dim: Move logic to dim.h In preparation for supporting more implementations of the DIM algorithm, I'm moving what would become common logic to a common library. Downstream DIM implementations will use the common lib for their implementation. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- MAINTAINERS | 1 + include/linux/dim.h | 153 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/net_dim.h | 148 +--------------------------------------------- 3 files changed, 156 insertions(+), 146 deletions(-) create mode 100644 include/linux/dim.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 429c6c624861..5d4b852d9d39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5589,6 +5589,7 @@ DYNAMIC INTERRUPT MODERATION M: Tal Gilboa S: Maintained F: include/linux/net_dim.h +F: include/linux/dim.h DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" diff --git a/include/linux/dim.h b/include/linux/dim.h new file mode 100644 index 000000000000..67d7ca40f3dd --- /dev/null +++ b/include/linux/dim.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef DIM_H +#define DIM_H + +#include + +#define NET_DIM_NEVENTS 64 + +/* more than 10% difference */ +#define IS_SIGNIFICANT_DIFF(val, ref) \ + (((100UL * abs((val) - (ref))) / (ref)) > 10) +#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ +& (BIT_ULL(bits) - 1)) + +struct net_dim_cq_moder { + u16 usec; + u16 pkts; + u8 cq_period_mode; +}; + +struct net_dim_sample { + ktime_t time; + u32 pkt_ctr; + u32 byte_ctr; + u16 event_ctr; +}; + +struct net_dim_stats { + int ppms; /* packets per msec */ + int bpms; /* bytes per msec */ + int epms; /* events per msec */ +}; + +struct net_dim { /* Dynamic Interrupt Moderation */ + u8 state; + struct net_dim_stats prev_stats; + struct net_dim_sample start_sample; + struct work_struct work; + u8 profile_ix; + u8 mode; + u8 tune_state; + u8 steps_right; + u8 steps_left; + u8 tired; +}; + +enum { + NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, + NET_DIM_CQ_PERIOD_NUM_MODES +}; + +enum { + NET_DIM_START_MEASURE, + NET_DIM_MEASURE_IN_PROGRESS, + NET_DIM_APPLY_NEW_PROFILE, +}; + +enum { + NET_DIM_PARKING_ON_TOP, + NET_DIM_PARKING_TIRED, + NET_DIM_GOING_RIGHT, + NET_DIM_GOING_LEFT, +}; + +enum { + NET_DIM_STATS_WORSE, + NET_DIM_STATS_SAME, + NET_DIM_STATS_BETTER, +}; + +enum { + NET_DIM_STEPPED, + NET_DIM_TOO_TIRED, + NET_DIM_ON_EDGE, +}; + +static inline bool net_dim_on_top(struct net_dim *net_dim) +{ + switch (net_dim->tune_state) { + case NET_DIM_PARKING_ON_TOP: + case NET_DIM_PARKING_TIRED: + return true; + case NET_DIM_GOING_RIGHT: + return (net_dim->steps_left > 1) && (net_dim->steps_right == 1); + default: /* NET_DIM_GOING_LEFT */ + return (net_dim->steps_right > 1) && (net_dim->steps_left == 1); + } +} + +static inline void net_dim_turn(struct net_dim *net_dim) +{ + switch (net_dim->tune_state) { + case NET_DIM_PARKING_ON_TOP: + case NET_DIM_PARKING_TIRED: + break; + case NET_DIM_GOING_RIGHT: + net_dim->tune_state = NET_DIM_GOING_LEFT; + net_dim->steps_left = 0; + break; + case NET_DIM_GOING_LEFT: + net_dim->tune_state = NET_DIM_GOING_RIGHT; + net_dim->steps_right = 0; + break; + } +} + +static inline void net_dim_park_on_top(struct net_dim *net_dim) +{ + net_dim->steps_right = 0; + net_dim->steps_left = 0; + net_dim->tired = 0; + net_dim->tune_state = NET_DIM_PARKING_ON_TOP; +} + +static inline void net_dim_park_tired(struct net_dim *net_dim) +{ + net_dim->steps_right = 0; + net_dim->steps_left = 0; + net_dim->tune_state = NET_DIM_PARKING_TIRED; +} + +static inline void +net_dim_sample(u16 event_ctr, u64 packets, u64 bytes, struct net_dim_sample *s) +{ + s->time = ktime_get(); + s->pkt_ctr = packets; + s->byte_ctr = bytes; + s->event_ctr = event_ctr; +} + +static inline void +net_dim_calc_stats(struct net_dim_sample *start, struct net_dim_sample *end, + struct net_dim_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); + + if (!delta_us) + return; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC, + delta_us); +} + +#endif /* DIM_H */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index fd458389f7d1..373cda74b167 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -35,73 +35,10 @@ #define NET_DIM_H #include - -struct net_dim_cq_moder { - u16 usec; - u16 pkts; - u8 cq_period_mode; -}; - -struct net_dim_sample { - ktime_t time; - u32 pkt_ctr; - u32 byte_ctr; - u16 event_ctr; -}; - -struct net_dim_stats { - int ppms; /* packets per msec */ - int bpms; /* bytes per msec */ - int epms; /* events per msec */ -}; - -struct net_dim { /* Adaptive Moderation */ - u8 state; - struct net_dim_stats prev_stats; - struct net_dim_sample start_sample; - struct work_struct work; - u8 profile_ix; - u8 mode; - u8 tune_state; - u8 steps_right; - u8 steps_left; - u8 tired; -}; - -enum { - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, - NET_DIM_CQ_PERIOD_NUM_MODES -}; - -/* Adaptive moderation logic */ -enum { - NET_DIM_START_MEASURE, - NET_DIM_MEASURE_IN_PROGRESS, - NET_DIM_APPLY_NEW_PROFILE, -}; - -enum { - NET_DIM_PARKING_ON_TOP, - NET_DIM_PARKING_TIRED, - NET_DIM_GOING_RIGHT, - NET_DIM_GOING_LEFT, -}; - -enum { - NET_DIM_STATS_WORSE, - NET_DIM_STATS_SAME, - NET_DIM_STATS_BETTER, -}; - -enum { - NET_DIM_STEPPED, - NET_DIM_TOO_TIRED, - NET_DIM_ON_EDGE, -}; +#include #define NET_DIM_PARAMS_NUM_PROFILES 5 -/* Adaptive moderation profiles */ +/* Netdev dynamic interrupt moderation profiles */ #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 #define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 @@ -188,36 +125,6 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode) return net_dim_get_tx_moderation(cq_period_mode, profile_ix); } -static inline bool net_dim_on_top(struct net_dim *dim) -{ - switch (dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: - case NET_DIM_PARKING_TIRED: - return true; - case NET_DIM_GOING_RIGHT: - return (dim->steps_left > 1) && (dim->steps_right == 1); - default: /* NET_DIM_GOING_LEFT */ - return (dim->steps_right > 1) && (dim->steps_left == 1); - } -} - -static inline void net_dim_turn(struct net_dim *dim) -{ - switch (dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: - case NET_DIM_PARKING_TIRED: - break; - case NET_DIM_GOING_RIGHT: - dim->tune_state = NET_DIM_GOING_LEFT; - dim->steps_left = 0; - break; - case NET_DIM_GOING_LEFT: - dim->tune_state = NET_DIM_GOING_RIGHT; - dim->steps_right = 0; - break; - } -} - static inline int net_dim_step(struct net_dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) @@ -245,21 +152,6 @@ static inline int net_dim_step(struct net_dim *dim) return NET_DIM_STEPPED; } -static inline void net_dim_park_on_top(struct net_dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tired = 0; - dim->tune_state = NET_DIM_PARKING_ON_TOP; -} - -static inline void net_dim_park_tired(struct net_dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tune_state = NET_DIM_PARKING_TIRED; -} - static inline void net_dim_exit_parking(struct net_dim *dim) { dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT : @@ -267,9 +159,6 @@ static inline void net_dim_exit_parking(struct net_dim *dim) net_dim_step(dim); } -#define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */ - static inline int net_dim_stats_compare(struct net_dim_stats *curr, struct net_dim_stats *prev) { @@ -351,39 +240,6 @@ static inline bool net_dim_decision(struct net_dim_stats *curr_stats, return dim->profile_ix != prev_ix; } -static inline void net_dim_sample(u16 event_ctr, - u64 packets, - u64 bytes, - struct net_dim_sample *s) -{ - s->time = ktime_get(); - s->pkt_ctr = packets; - s->byte_ctr = bytes; - s->event_ctr = event_ctr; -} - -#define NET_DIM_NEVENTS 64 -#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1)) - -static inline void net_dim_calc_stats(struct net_dim_sample *start, - struct net_dim_sample *end, - struct net_dim_stats *curr_stats) -{ - /* u32 holds up to 71 minutes, should be enough */ - u32 delta_us = ktime_us_delta(end->time, start->time); - u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); - u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, - start->byte_ctr); - - if (!delta_us) - return; - - curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); - curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); - curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC, - delta_us); -} - static inline void net_dim(struct net_dim *dim, struct net_dim_sample end_sample) { -- cgit v1.2.3 From 449986ea92412727e8c553eaa5c8d3ed884253c4 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Mon, 5 Nov 2018 11:57:10 +0200 Subject: linux/dim: Remove "net" prefix from internal DIM members Only renaming functions and structs which aren't used by an external code. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- include/linux/dim.h | 86 ++++++++++++++++++++++++------------------------ include/linux/net_dim.h | 87 ++++++++++++++++++++++++------------------------- 2 files changed, 86 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dim.h b/include/linux/dim.h index 67d7ca40f3dd..6ee991681d62 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -6,7 +6,7 @@ #include -#define NET_DIM_NEVENTS 64 +#define DIM_NEVENTS 64 /* more than 10% difference */ #define IS_SIGNIFICANT_DIFF(val, ref) \ @@ -27,7 +27,7 @@ struct net_dim_sample { u16 event_ctr; }; -struct net_dim_stats { +struct dim_stats { int ppms; /* packets per msec */ int bpms; /* bytes per msec */ int epms; /* events per msec */ @@ -35,7 +35,7 @@ struct net_dim_stats { struct net_dim { /* Dynamic Interrupt Moderation */ u8 state; - struct net_dim_stats prev_stats; + struct dim_stats prev_stats; struct net_dim_sample start_sample; struct work_struct work; u8 profile_ix; @@ -59,67 +59,67 @@ enum { }; enum { - NET_DIM_PARKING_ON_TOP, - NET_DIM_PARKING_TIRED, - NET_DIM_GOING_RIGHT, - NET_DIM_GOING_LEFT, + DIM_PARKING_ON_TOP, + DIM_PARKING_TIRED, + DIM_GOING_RIGHT, + DIM_GOING_LEFT, }; enum { - NET_DIM_STATS_WORSE, - NET_DIM_STATS_SAME, - NET_DIM_STATS_BETTER, + DIM_STATS_WORSE, + DIM_STATS_SAME, + DIM_STATS_BETTER, }; enum { - NET_DIM_STEPPED, - NET_DIM_TOO_TIRED, - NET_DIM_ON_EDGE, + DIM_STEPPED, + DIM_TOO_TIRED, + DIM_ON_EDGE, }; -static inline bool net_dim_on_top(struct net_dim *net_dim) +static inline bool dim_on_top(struct net_dim *dim) { - switch (net_dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: - case NET_DIM_PARKING_TIRED: + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: return true; - case NET_DIM_GOING_RIGHT: - return (net_dim->steps_left > 1) && (net_dim->steps_right == 1); - default: /* NET_DIM_GOING_LEFT */ - return (net_dim->steps_right > 1) && (net_dim->steps_left == 1); + case DIM_GOING_RIGHT: + return (dim->steps_left > 1) && (dim->steps_right == 1); + default: /* DIM_GOING_LEFT */ + return (dim->steps_right > 1) && (dim->steps_left == 1); } } -static inline void net_dim_turn(struct net_dim *net_dim) +static inline void dim_turn(struct net_dim *dim) { - switch (net_dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: - case NET_DIM_PARKING_TIRED: + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: break; - case NET_DIM_GOING_RIGHT: - net_dim->tune_state = NET_DIM_GOING_LEFT; - net_dim->steps_left = 0; + case DIM_GOING_RIGHT: + dim->tune_state = DIM_GOING_LEFT; + dim->steps_left = 0; break; - case NET_DIM_GOING_LEFT: - net_dim->tune_state = NET_DIM_GOING_RIGHT; - net_dim->steps_right = 0; + case DIM_GOING_LEFT: + dim->tune_state = DIM_GOING_RIGHT; + dim->steps_right = 0; break; } } -static inline void net_dim_park_on_top(struct net_dim *net_dim) +static inline void dim_park_on_top(struct net_dim *dim) { - net_dim->steps_right = 0; - net_dim->steps_left = 0; - net_dim->tired = 0; - net_dim->tune_state = NET_DIM_PARKING_ON_TOP; + dim->steps_right = 0; + dim->steps_left = 0; + dim->tired = 0; + dim->tune_state = DIM_PARKING_ON_TOP; } -static inline void net_dim_park_tired(struct net_dim *net_dim) +static inline void dim_park_tired(struct net_dim *dim) { - net_dim->steps_right = 0; - net_dim->steps_left = 0; - net_dim->tune_state = NET_DIM_PARKING_TIRED; + dim->steps_right = 0; + dim->steps_left = 0; + dim->tune_state = DIM_PARKING_TIRED; } static inline void @@ -132,8 +132,8 @@ net_dim_sample(u16 event_ctr, u64 packets, u64 bytes, struct net_dim_sample *s) } static inline void -net_dim_calc_stats(struct net_dim_sample *start, struct net_dim_sample *end, - struct net_dim_stats *curr_stats) +dim_calc_stats(struct net_dim_sample *start, struct net_dim_sample *end, + struct dim_stats *curr_stats) { /* u32 holds up to 71 minutes, should be enough */ u32 delta_us = ktime_us_delta(end->time, start->time); @@ -146,7 +146,7 @@ net_dim_calc_stats(struct net_dim_sample *start, struct net_dim_sample *end, curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); - curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC, + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, delta_us); } diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index 373cda74b167..f89fa4fdfb46 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -128,67 +128,67 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode) static inline int net_dim_step(struct net_dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) - return NET_DIM_TOO_TIRED; + return DIM_TOO_TIRED; switch (dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: - case NET_DIM_PARKING_TIRED: + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: break; - case NET_DIM_GOING_RIGHT: + case DIM_GOING_RIGHT: if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) - return NET_DIM_ON_EDGE; + return DIM_ON_EDGE; dim->profile_ix++; dim->steps_right++; break; - case NET_DIM_GOING_LEFT: + case DIM_GOING_LEFT: if (dim->profile_ix == 0) - return NET_DIM_ON_EDGE; + return DIM_ON_EDGE; dim->profile_ix--; dim->steps_left++; break; } dim->tired++; - return NET_DIM_STEPPED; + return DIM_STEPPED; } static inline void net_dim_exit_parking(struct net_dim *dim) { - dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT : - NET_DIM_GOING_RIGHT; + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : + DIM_GOING_RIGHT; net_dim_step(dim); } -static inline int net_dim_stats_compare(struct net_dim_stats *curr, - struct net_dim_stats *prev) +static inline int net_dim_stats_compare(struct dim_stats *curr, + struct dim_stats *prev) { if (!prev->bpms) - return curr->bpms ? NET_DIM_STATS_BETTER : - NET_DIM_STATS_SAME; + return curr->bpms ? DIM_STATS_BETTER : + DIM_STATS_SAME; if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) - return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER : - NET_DIM_STATS_WORSE; + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; if (!prev->ppms) - return curr->ppms ? NET_DIM_STATS_BETTER : - NET_DIM_STATS_SAME; + return curr->ppms ? DIM_STATS_BETTER : + DIM_STATS_SAME; if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) - return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER : - NET_DIM_STATS_WORSE; + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; if (!prev->epms) - return NET_DIM_STATS_SAME; + return DIM_STATS_SAME; if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) - return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER : - NET_DIM_STATS_WORSE; + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; - return NET_DIM_STATS_SAME; + return DIM_STATS_SAME; } -static inline bool net_dim_decision(struct net_dim_stats *curr_stats, +static inline bool net_dim_decision(struct dim_stats *curr_stats, struct net_dim *dim) { int prev_state = dim->tune_state; @@ -197,44 +197,44 @@ static inline bool net_dim_decision(struct net_dim_stats *curr_stats, int step_res; switch (dim->tune_state) { - case NET_DIM_PARKING_ON_TOP: + case DIM_PARKING_ON_TOP: stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != NET_DIM_STATS_SAME) + if (stats_res != DIM_STATS_SAME) net_dim_exit_parking(dim); break; - case NET_DIM_PARKING_TIRED: + case DIM_PARKING_TIRED: dim->tired--; if (!dim->tired) net_dim_exit_parking(dim); break; - case NET_DIM_GOING_RIGHT: - case NET_DIM_GOING_LEFT: + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != NET_DIM_STATS_BETTER) - net_dim_turn(dim); + if (stats_res != DIM_STATS_BETTER) + dim_turn(dim); - if (net_dim_on_top(dim)) { - net_dim_park_on_top(dim); + if (dim_on_top(dim)) { + dim_park_on_top(dim); break; } step_res = net_dim_step(dim); switch (step_res) { - case NET_DIM_ON_EDGE: - net_dim_park_on_top(dim); + case DIM_ON_EDGE: + dim_park_on_top(dim); break; - case NET_DIM_TOO_TIRED: - net_dim_park_tired(dim); + case DIM_TOO_TIRED: + dim_park_tired(dim); break; } break; } - if ((prev_state != NET_DIM_PARKING_ON_TOP) || - (dim->tune_state != NET_DIM_PARKING_ON_TOP)) + if (prev_state != DIM_PARKING_ON_TOP || + dim->tune_state != DIM_PARKING_ON_TOP) dim->prev_stats = *curr_stats; return dim->profile_ix != prev_ix; @@ -243,7 +243,7 @@ static inline bool net_dim_decision(struct net_dim_stats *curr_stats, static inline void net_dim(struct net_dim *dim, struct net_dim_sample end_sample) { - struct net_dim_stats curr_stats; + struct dim_stats curr_stats; u16 nevents; switch (dim->state) { @@ -251,10 +251,9 @@ static inline void net_dim(struct net_dim *dim, nevents = BIT_GAP(BITS_PER_TYPE(u16), end_sample.event_ctr, dim->start_sample.event_ctr); - if (nevents < NET_DIM_NEVENTS) + if (nevents < DIM_NEVENTS) break; - net_dim_calc_stats(&dim->start_sample, &end_sample, - &curr_stats); + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); if (net_dim_decision(&curr_stats, dim)) { dim->state = NET_DIM_APPLY_NEW_PROFILE; schedule_work(&dim->work); -- cgit v1.2.3 From c002bd529d719858d4cc233431c88c9efa844053 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Mon, 5 Nov 2018 12:07:52 +0200 Subject: linux/dim: Rename externally exposed macros Renamed macros in use by external drivers. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- include/linux/dim.h | 12 ++++++------ include/linux/net_dim.h | 18 +++++++++--------- 8 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index c623896e3ccb..b5e2f9d2cb71 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1099,7 +1099,7 @@ static void bcm_sysport_dim_work(struct work_struct *work) net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } /* RX and misc interrupt routine */ @@ -1440,7 +1440,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv, struct bcm_sysport_net_dim *dim = &priv->dim; INIT_WORK(&dim->dim.work, cb); - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; dim->event_ctr = 0; dim->packets = 0; dim->bytes = 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8314c00d7537..49de873043c0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7810,7 +7810,7 @@ static void bnxt_enable_napi(struct bnxt *bp) if (bp->bnapi[i]->rx_ring) { INIT_WORK(&cpr->dim.work, bnxt_dim_work); - cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } napi_enable(&bp->bnapi[i]->napi); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index afa97c8bb081..16a4588709d1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -28,5 +28,5 @@ void bnxt_dim_work(struct work_struct *work) cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 374b9ff05c88..5286a46ecfb0 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1928,7 +1928,7 @@ static void bcmgenet_dim_work(struct work_struct *work) net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } /* Assign skb to RX DMA descriptor. */ @@ -2085,7 +2085,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring, struct bcmgenet_net_dim *dim = &ring->dim; INIT_WORK(&dim->dim.work, cb); - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; dim->event_ctr = 0; dim->packets = 0; dim->bytes = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index d67adf70a97b..a80303add7c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -38,7 +38,7 @@ mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } void mlx5e_rx_dim_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 457cc39423f2..5b89e992e482 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -584,11 +584,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (params->rx_cq_moderation.cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: default: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } rq->page_cache.head = 0; @@ -2151,7 +2151,7 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, @@ -4440,8 +4440,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) { return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) diff --git a/include/linux/dim.h b/include/linux/dim.h index 6ee991681d62..989dbbdf9d45 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -47,15 +47,15 @@ struct net_dim { /* Dynamic Interrupt Moderation */ }; enum { - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, - NET_DIM_CQ_PERIOD_NUM_MODES + DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, + DIM_CQ_PERIOD_NUM_MODES }; enum { - NET_DIM_START_MEASURE, - NET_DIM_MEASURE_IN_PROGRESS, - NET_DIM_APPLY_NEW_PROFILE, + DIM_START_MEASURE, + DIM_MEASURE_IN_PROGRESS, + DIM_APPLY_NEW_PROFILE, }; enum { diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index f89fa4fdfb46..e0c97f824dd0 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -78,13 +78,13 @@ } static const struct net_dim_cq_moder -rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { +rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { NET_DIM_RX_EQE_PROFILES, NET_DIM_RX_CQE_PROFILES, }; static const struct net_dim_cq_moder -tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { +tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { NET_DIM_TX_EQE_PROFILES, NET_DIM_TX_CQE_PROFILES, }; @@ -101,7 +101,7 @@ net_dim_get_rx_moderation(u8 cq_period_mode, int ix) static inline struct net_dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode) { - u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; return net_dim_get_rx_moderation(cq_period_mode, profile_ix); @@ -119,7 +119,7 @@ net_dim_get_tx_moderation(u8 cq_period_mode, int ix) static inline struct net_dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode) { - u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; return net_dim_get_tx_moderation(cq_period_mode, profile_ix); @@ -247,7 +247,7 @@ static inline void net_dim(struct net_dim *dim, u16 nevents; switch (dim->state) { - case NET_DIM_MEASURE_IN_PROGRESS: + case DIM_MEASURE_IN_PROGRESS: nevents = BIT_GAP(BITS_PER_TYPE(u16), end_sample.event_ctr, dim->start_sample.event_ctr); @@ -255,17 +255,17 @@ static inline void net_dim(struct net_dim *dim, break; dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); if (net_dim_decision(&curr_stats, dim)) { - dim->state = NET_DIM_APPLY_NEW_PROFILE; + dim->state = DIM_APPLY_NEW_PROFILE; schedule_work(&dim->work); break; } /* fall through */ - case NET_DIM_START_MEASURE: + case DIM_START_MEASURE: net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, &dim->start_sample); - dim->state = NET_DIM_MEASURE_IN_PROGRESS; + dim->state = DIM_MEASURE_IN_PROGRESS; break; - case NET_DIM_APPLY_NEW_PROFILE: + case DIM_APPLY_NEW_PROFILE: break; } } -- cgit v1.2.3 From e5b6ab02d7aa4118c9a36491633812dcc442acbe Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Mon, 14 Jan 2019 15:32:49 +0200 Subject: linux/dim: Rename net_dim_sample() to net_dim_update_sample() In order to avoid confusion between the function and the similarly named struct. In preparation for removing the 'net' prefix from dim members. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 6 ++---- include/linux/dim.h | 3 ++- include/linux/net_dim.h | 4 ++-- 6 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b5e2f9d2cb71..faaf8ade15e5 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1019,8 +1019,8 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) } if (priv->dim.use_dim) { - net_dim_sample(priv->dim.event_ctr, priv->dim.packets, - priv->dim.bytes, &dim_sample); + net_dim_update_sample(priv->dim.event_ctr, priv->dim.packets, + priv->dim.bytes, &dim_sample); net_dim(&priv->dim.dim, dim_sample); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 49de873043c0..eaec949c367a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2130,10 +2130,10 @@ static int bnxt_poll(struct napi_struct *napi, int budget) if (bp->flags & BNXT_FLAG_DIM) { struct net_dim_sample dim_sample; - net_dim_sample(cpr->event_ctr, - cpr->rx_packets, - cpr->rx_bytes, - &dim_sample); + net_dim_update_sample(cpr->event_ctr, + cpr->rx_packets, + cpr->rx_bytes, + &dim_sample); net_dim(&cpr->dim, dim_sample); } return work_done; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 5286a46ecfb0..297ae786ffed 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1909,8 +1909,8 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) } if (ring->dim.use_dim) { - net_dim_sample(ring->dim.event_ctr, ring->dim.packets, - ring->dim.bytes, &dim_sample); + net_dim_update_sample(ring->dim.event_ctr, ring->dim.packets, + ring->dim.bytes, &dim_sample); net_dim(&ring->dim.dim, dim_sample); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f9862bf75491..07432e6428cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -53,8 +53,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + net_dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } @@ -66,8 +65,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + net_dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } diff --git a/include/linux/dim.h b/include/linux/dim.h index 989dbbdf9d45..f0f20ed25497 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -123,7 +123,8 @@ static inline void dim_park_tired(struct net_dim *dim) } static inline void -net_dim_sample(u16 event_ctr, u64 packets, u64 bytes, struct net_dim_sample *s) +net_dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, + struct net_dim_sample *s) { s->time = ktime_get(); s->pkt_ctr = packets; diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index e0c97f824dd0..d4b40adc7fa1 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -261,8 +261,8 @@ static inline void net_dim(struct net_dim *dim, } /* fall through */ case DIM_START_MEASURE: - net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, - &dim->start_sample); + net_dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); dim->state = DIM_MEASURE_IN_PROGRESS; break; case DIM_APPLY_NEW_PROFILE: -- cgit v1.2.3 From 8960b38932bee8db0bc9c4d8c135f21df6cdd297 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 31 Jan 2019 16:44:48 +0200 Subject: linux/dim: Rename externally used net_dim members Removed 'net' prefix from functions and structs used by external drivers. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/broadcom/bcmsysport.c | 16 ++++++------ drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c | 4 +-- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 5 ++-- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 14 +++++----- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 10 ++++---- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++----- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 8 +++--- include/linux/dim.h | 21 ++++++++------- include/linux/net_dim.h | 30 +++++++++++----------- 15 files changed, 73 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index faaf8ade15e5..c1247b2948ff 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -612,7 +612,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) { struct bcm_sysport_priv *priv = netdev_priv(dev); - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; unsigned int i; @@ -995,7 +995,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) { struct bcm_sysport_priv *priv = container_of(napi, struct bcm_sysport_priv, napi); - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; unsigned int work_done = 0; work_done = bcm_sysport_desc_rx(priv, budget); @@ -1019,8 +1019,8 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) } if (priv->dim.use_dim) { - net_dim_update_sample(priv->dim.event_ctr, priv->dim.packets, - priv->dim.bytes, &dim_sample); + dim_update_sample(priv->dim.event_ctr, priv->dim.packets, + priv->dim.bytes, &dim_sample); net_dim(&priv->dim.dim, dim_sample); } @@ -1090,13 +1090,13 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) static void bcm_sysport_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct bcm_sysport_net_dim *ndim = container_of(dim, struct bcm_sysport_net_dim, dim); struct bcm_sysport_priv *priv = container_of(ndim, struct bcm_sysport_priv, dim); - struct net_dim_cq_moder cur_profile = - net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode, + dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); dim->state = DIM_START_MEASURE; @@ -1449,7 +1449,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv, static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) { struct bcm_sysport_net_dim *dim = &priv->dim; - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; usecs = priv->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 6f3141c86436..cbe6d559d964 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -705,7 +705,7 @@ struct bcm_sysport_net_dim { u16 event_ctr; unsigned long packets; unsigned long bytes; - struct net_dim dim; + struct dim dim; }; /* Software view of the TX ring */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index eaec949c367a..c54668004600 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2128,12 +2128,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) } } if (bp->flags & BNXT_FLAG_DIM) { - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; - net_dim_update_sample(cpr->event_ctr, - cpr->rx_packets, - cpr->rx_bytes, - &dim_sample); + dim_update_sample(cpr->event_ctr, + cpr->rx_packets, + cpr->rx_bytes, + &dim_sample); net_dim(&cpr->dim, dim_sample); } return work_done; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index eca36dd6b751..a552c5539cc9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -809,7 +809,7 @@ struct bnxt_cp_ring_info { u64 rx_bytes; u64 event_ctr; - struct net_dim dim; + struct dim dim; union { struct tx_cmp *cp_desc_ring[MAX_CP_PAGES]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c index 94e208e9789f..3d1d53fbb135 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c @@ -21,7 +21,7 @@ static ssize_t debugfs_dim_read(struct file *filep, char __user *buffer, size_t count, loff_t *ppos) { - struct net_dim *dim = filep->private_data; + struct dim *dim = filep->private_data; int len; char *buf; @@ -61,7 +61,7 @@ static const struct file_operations debugfs_dim_fops = { .read = debugfs_dim_read, }; -static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx, +static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx, struct dentry *dd) { static char qname[16]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 16a4588709d1..11605f9fa61e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -13,15 +13,14 @@ void bnxt_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, - work); + struct dim *dim = container_of(work, struct dim, work); struct bnxt_cp_ring_info *cpr = container_of(dim, struct bnxt_cp_ring_info, dim); struct bnxt_napi *bnapi = container_of(cpr, struct bnxt_napi, cp_ring); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); cpr->rx_ring_coal.coal_ticks = cur_moder.usec; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 297ae786ffed..b7f8f4f1088f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -643,7 +643,7 @@ static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring, static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, struct ethtool_coalesce *ec) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; ring->rx_coalesce_usecs = ec->rx_coalesce_usecs; @@ -1898,7 +1898,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) { struct bcmgenet_rx_ring *ring = container_of(napi, struct bcmgenet_rx_ring, napi); - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; unsigned int work_done; work_done = bcmgenet_desc_rx(ring, budget); @@ -1909,8 +1909,8 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) } if (ring->dim.use_dim) { - net_dim_update_sample(ring->dim.event_ctr, ring->dim.packets, - ring->dim.bytes, &dim_sample); + dim_update_sample(ring->dim.event_ctr, ring->dim.packets, + ring->dim.bytes, &dim_sample); net_dim(&ring->dim.dim, dim_sample); } @@ -1919,12 +1919,12 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) static void bcmgenet_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct bcmgenet_net_dim *ndim = container_of(dim, struct bcmgenet_net_dim, dim); struct bcmgenet_rx_ring *ring = container_of(ndim, struct bcmgenet_rx_ring, dim); - struct net_dim_cq_moder cur_profile = + struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); @@ -2094,7 +2094,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring, static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) { struct bcmgenet_net_dim *dim = &ring->dim; - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; usecs = ring->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 14b49612aa86..6e418d9c3706 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -581,7 +581,7 @@ struct bcmgenet_net_dim { u16 event_ctr; unsigned long packets; unsigned long bytes; - struct net_dim dim; + struct dim dim; }; struct bcmgenet_rx_ring { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3a183d690e23..11efd6e4bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -238,9 +238,9 @@ struct mlx5e_params { u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; - struct net_dim_cq_moder rx_cq_moderation; - struct net_dim_cq_moder tx_cq_moderation; bool tunneled_offload_en; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; bool lro_en; u8 tx_min_inline_mode; bool vlan_strip_disable; @@ -356,7 +356,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; - struct net_dim dim; /* Adaptive Moderation */ + struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -595,7 +595,7 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct net_dim dim; /* Dynamic Interrupt Moderation */ + struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog *xdp_prog; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index a80303add7c0..ba3c1be9f2d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -34,7 +34,7 @@ #include "en.h" static void -mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); @@ -43,9 +43,9 @@ mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); @@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index dd764e0471f2..c853b657739c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -466,7 +466,7 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -521,7 +521,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5b89e992e482..9705101c0235 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1569,7 +1569,7 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) } static int mlx5e_open_cq(struct mlx5e_channel *c, - struct net_dim_cq_moder moder, + struct dim_cq_moder moder, struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { @@ -1774,7 +1774,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel **cp) { int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); - struct net_dim_cq_moder icocq_moder = {0, 0}; + struct dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; struct mlx5e_channel *c; unsigned int irq; @@ -4411,9 +4411,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; @@ -4424,9 +4424,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) return moder; } -static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 07432e6428cf..e6c434efbd46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -48,24 +48,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) { struct mlx5e_sq_stats *stats = sq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) { struct mlx5e_rq_stats *stats = rq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } diff --git a/include/linux/dim.h b/include/linux/dim.h index f0f20ed25497..60e5074a7cc0 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -14,13 +14,13 @@ #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ & (BIT_ULL(bits) - 1)) -struct net_dim_cq_moder { +struct dim_cq_moder { u16 usec; u16 pkts; u8 cq_period_mode; }; -struct net_dim_sample { +struct dim_sample { ktime_t time; u32 pkt_ctr; u32 byte_ctr; @@ -33,10 +33,10 @@ struct dim_stats { int epms; /* events per msec */ }; -struct net_dim { /* Dynamic Interrupt Moderation */ +struct dim { /* Dynamic Interrupt Moderation */ u8 state; struct dim_stats prev_stats; - struct net_dim_sample start_sample; + struct dim_sample start_sample; struct work_struct work; u8 profile_ix; u8 mode; @@ -77,7 +77,7 @@ enum { DIM_ON_EDGE, }; -static inline bool dim_on_top(struct net_dim *dim) +static inline bool dim_on_top(struct dim *dim) { switch (dim->tune_state) { case DIM_PARKING_ON_TOP: @@ -90,7 +90,7 @@ static inline bool dim_on_top(struct net_dim *dim) } } -static inline void dim_turn(struct net_dim *dim) +static inline void dim_turn(struct dim *dim) { switch (dim->tune_state) { case DIM_PARKING_ON_TOP: @@ -107,7 +107,7 @@ static inline void dim_turn(struct net_dim *dim) } } -static inline void dim_park_on_top(struct net_dim *dim) +static inline void dim_park_on_top(struct dim *dim) { dim->steps_right = 0; dim->steps_left = 0; @@ -115,7 +115,7 @@ static inline void dim_park_on_top(struct net_dim *dim) dim->tune_state = DIM_PARKING_ON_TOP; } -static inline void dim_park_tired(struct net_dim *dim) +static inline void dim_park_tired(struct dim *dim) { dim->steps_right = 0; dim->steps_left = 0; @@ -123,8 +123,7 @@ static inline void dim_park_tired(struct net_dim *dim) } static inline void -net_dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, - struct net_dim_sample *s) +dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) { s->time = ktime_get(); s->pkt_ctr = packets; @@ -133,7 +132,7 @@ net_dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, } static inline void -dim_calc_stats(struct net_dim_sample *start, struct net_dim_sample *end, +dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats) { /* u32 holds up to 71 minutes, should be enough */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index d4b40adc7fa1..4e009ec193ef 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -77,28 +77,28 @@ {64, 32} \ } -static const struct net_dim_cq_moder +static const struct dim_cq_moder rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { NET_DIM_RX_EQE_PROFILES, NET_DIM_RX_CQE_PROFILES, }; -static const struct net_dim_cq_moder +static const struct dim_cq_moder tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { NET_DIM_TX_EQE_PROFILES, NET_DIM_TX_CQE_PROFILES, }; -static inline struct net_dim_cq_moder +static inline struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix) { - struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; cq_moder.cq_period_mode = cq_period_mode; return cq_moder; } -static inline struct net_dim_cq_moder +static inline struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode) { u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? @@ -107,16 +107,16 @@ net_dim_get_def_rx_moderation(u8 cq_period_mode) return net_dim_get_rx_moderation(cq_period_mode, profile_ix); } -static inline struct net_dim_cq_moder +static inline struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix) { - struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; cq_moder.cq_period_mode = cq_period_mode; return cq_moder; } -static inline struct net_dim_cq_moder +static inline struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode) { u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? @@ -125,7 +125,7 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode) return net_dim_get_tx_moderation(cq_period_mode, profile_ix); } -static inline int net_dim_step(struct net_dim *dim) +static inline int net_dim_step(struct dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) return DIM_TOO_TIRED; @@ -152,7 +152,7 @@ static inline int net_dim_step(struct net_dim *dim) return DIM_STEPPED; } -static inline void net_dim_exit_parking(struct net_dim *dim) +static inline void net_dim_exit_parking(struct dim *dim) { dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; @@ -189,7 +189,7 @@ static inline int net_dim_stats_compare(struct dim_stats *curr, } static inline bool net_dim_decision(struct dim_stats *curr_stats, - struct net_dim *dim) + struct dim *dim) { int prev_state = dim->tune_state; int prev_ix = dim->profile_ix; @@ -240,8 +240,8 @@ static inline bool net_dim_decision(struct dim_stats *curr_stats, return dim->profile_ix != prev_ix; } -static inline void net_dim(struct net_dim *dim, - struct net_dim_sample end_sample) +static inline void net_dim(struct dim *dim, + struct dim_sample end_sample) { struct dim_stats curr_stats; u16 nevents; @@ -261,8 +261,8 @@ static inline void net_dim(struct net_dim *dim, } /* fall through */ case DIM_START_MEASURE: - net_dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, - end_sample.byte_ctr, &dim->start_sample); + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); dim->state = DIM_MEASURE_IN_PROGRESS; break; case DIM_APPLY_NEW_PROFILE: -- cgit v1.2.3 From 4f75da3666c0c572967729a2401ac650be5581b6 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 10 Jan 2019 17:33:17 +0200 Subject: linux/dim: Move implementation to .c files Moved all logic from dim.h and net_dim.h to dim.c and net_dim.c. This is both more structurally appealing and would allow to only expose externally used functions. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- MAINTAINERS | 2 +- drivers/net/ethernet/broadcom/Kconfig | 1 + drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 2 +- include/linux/dim.h | 319 +++++++++++++++++----- include/linux/net_dim.h | 273 ------------------ lib/Kconfig | 8 + lib/Makefile | 1 + lib/dim/Makefile | 9 + lib/dim/dim.c | 74 +++++ lib/dim/net_dim.c | 190 +++++++++++++ 17 files changed, 547 insertions(+), 345 deletions(-) delete mode 100644 include/linux/net_dim.h create mode 100644 lib/dim/Makefile create mode 100644 lib/dim/dim.c create mode 100644 lib/dim/net_dim.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 5d4b852d9d39..f78dd16195e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5588,8 +5588,8 @@ F: include/linux/dynamic_debug.h DYNAMIC INTERRUPT MODERATION M: Tal Gilboa S: Maintained -F: include/linux/net_dim.h F: include/linux/dim.h +F: lib/dim/ DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index b123509d385f..2e4a8c7237ef 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM default y depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ SIBYTE_SB1xxx_SOC + select DIMLIB ---help--- If you have a network (Ethernet) chipset belonging to this class, say Y. diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index cbe6d559d964..f6677a02d811 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* Receive/transmit descriptor format */ #define DESC_ADDR_HI_STATUS_LEN 0x00 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a552c5539cc9..54c01705f3bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include struct tx_bd { __le32 tx_bd_len_flags_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c index 3d1d53fbb135..61393f351a77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c @@ -11,7 +11,7 @@ #include #include #include "bnxt_hsi.h" -#include +#include #include "bnxt.h" #include "bnxt_debugfs.h" diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 11605f9fa61e..6f6576dc417a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -7,7 +7,7 @@ * the Free Software Foundation. */ -#include +#include #include "bnxt_hsi.h" #include "bnxt.h" diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 6e418d9c3706..b2f05e47dc65 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include /* total number of Buffer Descriptors, same for Rx/Tx */ #define TOTAL_DESC 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 2391e3cfb56b..7845aa5bf6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 11efd6e4bdc3..abf42d3aabe9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include "wq.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index ba3c1be9f2d3..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include +#include #include "en.h" static void diff --git a/include/linux/dim.h b/include/linux/dim.h index 60e5074a7cc0..f48ede3e0322 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -6,20 +6,49 @@ #include +/** + * Number of events between DIM iterations. + * Causes a moderation of the algorithm run. + */ #define DIM_NEVENTS 64 -/* more than 10% difference */ +/** + * Is a difference between values justifies taking an action. + * We consider 10% difference as significant. + */ #define IS_SIGNIFICANT_DIFF(val, ref) \ (((100UL * abs((val) - (ref))) / (ref)) > 10) + +/** + * Calculate the gap between two values. + * Take wrap-around and variable size into consideration. + */ #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ -& (BIT_ULL(bits) - 1)) + & (BIT_ULL(bits) - 1)) +/** + * Structure for CQ moderation values. + * Used for communications between DIM and its consumer. + * + * @usec: CQ timer suggestion (by DIM) + * @pkts: CQ packet counter suggestion (by DIM) + * @cq_period_mode: CQ priod count mode (from CQE/EQE) + */ struct dim_cq_moder { u16 usec; u16 pkts; u8 cq_period_mode; }; +/** + * Structure for DIM sample data. + * Used for communications between DIM and its consumer. + * + * @time: Sample timestamp + * @pkt_ctr: Number of packets + * @byte_ctr: Number of bytes + * @event_ctr: Number of events + */ struct dim_sample { ktime_t time; u32 pkt_ctr; @@ -27,13 +56,36 @@ struct dim_sample { u16 event_ctr; }; +/** + * Structure for DIM stats. + * Used for holding current measured rates. + * + * @ppms: Packets per msec + * @bpms: Bytes per msec + * @epms: Events per msec + */ struct dim_stats { - int ppms; /* packets per msec */ - int bpms; /* bytes per msec */ - int epms; /* events per msec */ + int ppms; + int bpms; + int epms; }; -struct dim { /* Dynamic Interrupt Moderation */ +/** + * Main structure for dynamic interrupt moderation (DIM). + * Used for holding all information about a specific DIM instance. + * + * @state: Algorithm state (see below) + * @prev_stats: Measured rates from previous iteration (for comparison) + * @start_sample: Sampled data at start of current iteration + * @work: Work to perform on action required + * @profile_ix: Current moderation profile + * @mode: CQ period count mode + * @tune_state: Algorithm tuning state (see below) + * @steps_right: Number of steps taken towards higher moderation + * @steps_left: Number of steps taken towards lower moderation + * @tired: Parking depth counter + */ +struct dim { u8 state; struct dim_stats prev_stats; struct dim_sample start_sample; @@ -46,18 +98,49 @@ struct dim { /* Dynamic Interrupt Moderation */ u8 tired; }; +/** + * enum dim_cq_period_mode + * + * These are the modes for CQ period count. + * + * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE + * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) + * @DIM_CQ_PERIOD_NUM_MODES: Number of modes + */ enum { DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_NUM_MODES }; +/** + * enum dim_state + * + * These are the DIM algorithm states. + * These will determine if the algorithm is in a valid state to start an iteration. + * + * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) + * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if + * need to perform an action + * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure + */ enum { DIM_START_MEASURE, DIM_MEASURE_IN_PROGRESS, DIM_APPLY_NEW_PROFILE, }; +/** + * enum dim_tune_state + * + * These are the DIM algorithm tune states. + * These will determine which action the algorithm should perform. + * + * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference + * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0 + * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels + * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels + */ enum { DIM_PARKING_ON_TOP, DIM_PARKING_TIRED, @@ -65,63 +148,95 @@ enum { DIM_GOING_LEFT, }; +/** + * enum dim_stats_state + * + * These are the DIM algorithm statistics states. + * These will determine the verdict of current iteration. + * + * @DIM_STATS_WORSE: Current iteration shows worse performance than before + * @DIM_STATS_WORSE: Current iteration shows same performance than before + * @DIM_STATS_WORSE: Current iteration shows better performance than before + */ enum { DIM_STATS_WORSE, DIM_STATS_SAME, DIM_STATS_BETTER, }; +/** + * enum dim_step_result + * + * These are the DIM algorithm step results. + * These describe the result of a step. + * + * @DIM_STEPPED: Performed a regular step + * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to + * tired parking + * @DIM_ON_EDGE: Stepped to the most left/right profile + */ enum { DIM_STEPPED, DIM_TOO_TIRED, DIM_ON_EDGE, }; -static inline bool dim_on_top(struct dim *dim) -{ - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - return true; - case DIM_GOING_RIGHT: - return (dim->steps_left > 1) && (dim->steps_right == 1); - default: /* DIM_GOING_LEFT */ - return (dim->steps_right > 1) && (dim->steps_left == 1); - } -} +/** + * dim_on_top - check if current state is a good place to stop (top location) + * @dim: DIM context + * + * Check if current profile is a good place to park at. + * This will result in reducing the DIM checks frequency as we assume we + * shouldn't probably change profiles, unless traffic pattern wasn't changed. + */ +bool dim_on_top(struct dim *dim); -static inline void dim_turn(struct dim *dim) -{ - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - break; - case DIM_GOING_RIGHT: - dim->tune_state = DIM_GOING_LEFT; - dim->steps_left = 0; - break; - case DIM_GOING_LEFT: - dim->tune_state = DIM_GOING_RIGHT; - dim->steps_right = 0; - break; - } -} +/** + * dim_turn - change profile alterning direction + * @dim: DIM context + * + * Go left if we were going right and vice-versa. + * Do nothing if currently parking. + */ +void dim_turn(struct dim *dim); -static inline void dim_park_on_top(struct dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tired = 0; - dim->tune_state = DIM_PARKING_ON_TOP; -} +/** + * dim_park_on_top - enter a parking state on a top location + * @dim: DIM context + * + * Enter parking state. + * Clear all movement history. + */ +void dim_park_on_top(struct dim *dim); -static inline void dim_park_tired(struct dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tune_state = DIM_PARKING_TIRED; -} +/** + * dim_park_tired - enter a tired parking state + * @dim: DIM context + * + * Enter parking state. + * Clear all movement history and cause DIM checks frequency to reduce. + */ +void dim_park_tired(struct dim *dim); + +/** + * dim_calc_stats - calculate the difference between two samples + * @start: start sample + * @end: end sample + * @curr_stats: delta between samples + * + * Calculate the delta between two samples (in data rates). + * Takes into consideration counter wrap-around. + */ +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats); +/** + * dim_update_sample - set a sample's fields with give values + * @event_ctr: number of events to set + * @packets: number of packets to set + * @bytes: number of bytes to set + * @s: DIM sample + */ static inline void dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) { @@ -131,23 +246,99 @@ dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) s->event_ctr = event_ctr; } -static inline void -dim_calc_stats(struct dim_sample *start, struct dim_sample *end, - struct dim_stats *curr_stats) -{ - /* u32 holds up to 71 minutes, should be enough */ - u32 delta_us = ktime_us_delta(end->time, start->time); - u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); - u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, - start->byte_ctr); - - if (!delta_us) - return; - - curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); - curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); - curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, - delta_us); +/* Net DIM */ + +/* + * Net DIM profiles: + * There are different set of profiles for each CQ period mode. + * There are different set of profiles for RX/TX CQs. + * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES + */ +#define NET_DIM_PARAMS_NUM_PROFILES 5 +#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEF_PROFILE_CQE 1 +#define NET_DIM_DEF_PROFILE_EQE 1 + +#define NET_DIM_RX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ } +#define NET_DIM_RX_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + +static const struct dim_cq_moder +rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, +}; + +static const struct dim_cq_moder +tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, +}; + +/** + * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile + * @cq_period_mode: CQ period mode + * @ix: Profile index + */ +struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix); + +/** + * net_dim_get_def_rx_moderation - provide the default RX moderation + * @cq_period_mode: CQ period mode + */ +struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode); + +/** + * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile + * @cq_period_mode: CQ period mode + * @ix: Profile index + */ +struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix); + +/** + * net_dim_get_def_tx_moderation - provide the default TX moderation + * @cq_period_mode: CQ period mode + */ +struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); + +/** + * net_dim - main DIM algorithm entry point + * @dim: DIM instance information + * @end_sample: Current data measurement + * + * Called by the consumer. + * This is the main logic of the algorithm, where data is processed in order to decide on next + * required action. + */ +void net_dim(struct dim *dim, struct dim_sample end_sample); + #endif /* DIM_H */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h deleted file mode 100644 index 4e009ec193ef..000000000000 --- a/include/linux/net_dim.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef NET_DIM_H -#define NET_DIM_H - -#include -#include - -#define NET_DIM_PARAMS_NUM_PROFILES 5 -/* Netdev dynamic interrupt moderation profiles */ -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 -#define NET_DIM_DEF_PROFILE_CQE 1 -#define NET_DIM_DEF_PROFILE_EQE 1 - -/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ -#define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ -} - -#define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ -} - -#define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ -} - -#define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ -} - -static const struct dim_cq_moder -rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_RX_EQE_PROFILES, - NET_DIM_RX_CQE_PROFILES, -}; - -static const struct dim_cq_moder -tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_TX_EQE_PROFILES, - NET_DIM_TX_CQE_PROFILES, -}; - -static inline struct dim_cq_moder -net_dim_get_rx_moderation(u8 cq_period_mode, int ix) -{ - struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; - - cq_moder.cq_period_mode = cq_period_mode; - return cq_moder; -} - -static inline struct dim_cq_moder -net_dim_get_def_rx_moderation(u8 cq_period_mode) -{ - u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - - return net_dim_get_rx_moderation(cq_period_mode, profile_ix); -} - -static inline struct dim_cq_moder -net_dim_get_tx_moderation(u8 cq_period_mode, int ix) -{ - struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; - - cq_moder.cq_period_mode = cq_period_mode; - return cq_moder; -} - -static inline struct dim_cq_moder -net_dim_get_def_tx_moderation(u8 cq_period_mode) -{ - u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - - return net_dim_get_tx_moderation(cq_period_mode, profile_ix); -} - -static inline int net_dim_step(struct dim *dim) -{ - if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) - return DIM_TOO_TIRED; - - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - break; - case DIM_GOING_RIGHT: - if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) - return DIM_ON_EDGE; - dim->profile_ix++; - dim->steps_right++; - break; - case DIM_GOING_LEFT: - if (dim->profile_ix == 0) - return DIM_ON_EDGE; - dim->profile_ix--; - dim->steps_left++; - break; - } - - dim->tired++; - return DIM_STEPPED; -} - -static inline void net_dim_exit_parking(struct dim *dim) -{ - dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : - DIM_GOING_RIGHT; - net_dim_step(dim); -} - -static inline int net_dim_stats_compare(struct dim_stats *curr, - struct dim_stats *prev) -{ - if (!prev->bpms) - return curr->bpms ? DIM_STATS_BETTER : - DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) - return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - if (!prev->ppms) - return curr->ppms ? DIM_STATS_BETTER : - DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) - return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - if (!prev->epms) - return DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) - return (curr->epms < prev->epms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - return DIM_STATS_SAME; -} - -static inline bool net_dim_decision(struct dim_stats *curr_stats, - struct dim *dim) -{ - int prev_state = dim->tune_state; - int prev_ix = dim->profile_ix; - int stats_res; - int step_res; - - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != DIM_STATS_SAME) - net_dim_exit_parking(dim); - break; - - case DIM_PARKING_TIRED: - dim->tired--; - if (!dim->tired) - net_dim_exit_parking(dim); - break; - - case DIM_GOING_RIGHT: - case DIM_GOING_LEFT: - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != DIM_STATS_BETTER) - dim_turn(dim); - - if (dim_on_top(dim)) { - dim_park_on_top(dim); - break; - } - - step_res = net_dim_step(dim); - switch (step_res) { - case DIM_ON_EDGE: - dim_park_on_top(dim); - break; - case DIM_TOO_TIRED: - dim_park_tired(dim); - break; - } - - break; - } - - if (prev_state != DIM_PARKING_ON_TOP || - dim->tune_state != DIM_PARKING_ON_TOP) - dim->prev_stats = *curr_stats; - - return dim->profile_ix != prev_ix; -} - -static inline void net_dim(struct dim *dim, - struct dim_sample end_sample) -{ - struct dim_stats curr_stats; - u16 nevents; - - switch (dim->state) { - case DIM_MEASURE_IN_PROGRESS: - nevents = BIT_GAP(BITS_PER_TYPE(u16), - end_sample.event_ctr, - dim->start_sample.event_ctr); - if (nevents < DIM_NEVENTS) - break; - dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); - if (net_dim_decision(&curr_stats, dim)) { - dim->state = DIM_APPLY_NEW_PROFILE; - schedule_work(&dim->work); - break; - } - /* fall through */ - case DIM_START_MEASURE: - dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, - end_sample.byte_ctr, &dim->start_sample); - dim->state = DIM_MEASURE_IN_PROGRESS; - break; - case DIM_APPLY_NEW_PROFILE: - break; - } -} - -#endif /* NET_DIM_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 90623a0e1942..78ddb9526b62 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -562,6 +562,14 @@ config SIGNATURE Digital signature verification. Currently only RSA is supported. Implementation is done using GnuPG MPI library +config DIMLIB + bool "DIM library" + default y + help + Dynamic Interrupt Moderation library. + Implements an algorithm for dynamically change CQ modertion values + according to run time performance. + # # libfdt files, only selected if needed. # diff --git a/lib/Makefile b/lib/Makefile index fb7697031a79..dcb558c7554d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -202,6 +202,7 @@ obj-$(CONFIG_GLOB) += glob.o obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_MPILIB) += mpi/ +obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o diff --git a/lib/dim/Makefile b/lib/dim/Makefile new file mode 100644 index 000000000000..160afe288df0 --- /dev/null +++ b/lib/dim/Makefile @@ -0,0 +1,9 @@ +# +# DIM Dynamic Interrupt Moderation library +# + +obj-$(CONFIG_DIMLIB) = net_dim.o + +net_dim-y = \ + dim.o \ + net_dim.o diff --git a/lib/dim/dim.c b/lib/dim/dim.c new file mode 100644 index 000000000000..17d5236759bd --- /dev/null +++ b/lib/dim/dim.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include + +bool dim_on_top(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + return true; + case DIM_GOING_RIGHT: + return (dim->steps_left > 1) && (dim->steps_right == 1); + default: /* DIM_GOING_LEFT */ + return (dim->steps_right > 1) && (dim->steps_left == 1); + } +} +EXPORT_SYMBOL(dim_on_top); + +void dim_turn(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + dim->tune_state = DIM_GOING_LEFT; + dim->steps_left = 0; + break; + case DIM_GOING_LEFT: + dim->tune_state = DIM_GOING_RIGHT; + dim->steps_right = 0; + break; + } +} +EXPORT_SYMBOL(dim_turn); + +void dim_park_on_top(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tired = 0; + dim->tune_state = DIM_PARKING_ON_TOP; +} +EXPORT_SYMBOL(dim_park_on_top); + +void dim_park_tired(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tune_state = DIM_PARKING_TIRED; +} +EXPORT_SYMBOL(dim_park_tired); + +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); + + if (!delta_us) + return; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, + delta_us); +} +EXPORT_SYMBOL(dim_calc_stats); diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c new file mode 100644 index 000000000000..5bcc902c5388 --- /dev/null +++ b/lib/dim/net_dim.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include + +struct dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_rx_moderation); + +struct dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_rx_moderation); + +struct dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_tx_moderation); + +struct dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_tx_moderation); + +static int net_dim_step(struct dim *dim) +{ + if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) + return DIM_TOO_TIRED; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) + return DIM_ON_EDGE; + dim->profile_ix++; + dim->steps_right++; + break; + case DIM_GOING_LEFT: + if (dim->profile_ix == 0) + return DIM_ON_EDGE; + dim->profile_ix--; + dim->steps_left++; + break; + } + + dim->tired++; + return DIM_STEPPED; +} + +static void net_dim_exit_parking(struct dim *dim) +{ + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; + net_dim_step(dim); +} + +static int net_dim_stats_compare(struct dim_stats *curr, + struct dim_stats *prev) +{ + if (!prev->bpms) + return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->ppms) + return curr->ppms ? DIM_STATS_BETTER : + DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->epms) + return DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + return DIM_STATS_SAME; +} + +static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) +{ + int prev_state = dim->tune_state; + int prev_ix = dim->profile_ix; + int stats_res; + int step_res; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_SAME) + net_dim_exit_parking(dim); + break; + + case DIM_PARKING_TIRED: + dim->tired--; + if (!dim->tired) + net_dim_exit_parking(dim); + break; + + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_BETTER) + dim_turn(dim); + + if (dim_on_top(dim)) { + dim_park_on_top(dim); + break; + } + + step_res = net_dim_step(dim); + switch (step_res) { + case DIM_ON_EDGE: + dim_park_on_top(dim); + break; + case DIM_TOO_TIRED: + dim_park_tired(dim); + break; + } + + break; + } + + if (prev_state != DIM_PARKING_ON_TOP || + dim->tune_state != DIM_PARKING_ON_TOP) + dim->prev_stats = *curr_stats; + + return dim->profile_ix != prev_ix; +} + +void net_dim(struct dim *dim, struct dim_sample end_sample) +{ + struct dim_stats curr_stats; + u16 nevents; + + switch (dim->state) { + case DIM_MEASURE_IN_PROGRESS: + nevents = BIT_GAP(BITS_PER_TYPE(u16), + end_sample.event_ctr, + dim->start_sample.event_ctr); + if (nevents < DIM_NEVENTS) + break; + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); + if (net_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); + break; + } + /* fall through */ + case DIM_START_MEASURE: + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); + dim->state = DIM_MEASURE_IN_PROGRESS; + break; + case DIM_APPLY_NEW_PROFILE: + break; + } +} +EXPORT_SYMBOL(net_dim); -- cgit v1.2.3 From 398c2b05bbee21cc172dfff017c0351d4d14e04c Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Thu, 22 Nov 2018 09:51:17 +0200 Subject: linux/dim: Add completions count to dim_sample Added a measurement of completions per/msec to allow for completion based dim algorithms. In order to use dynamic interrupt moderation with RDMA we need to have a different measurment than packets per second. This change is meant to prepare for adding a new DIM method. All drivers that use net_dim and thus do not need a completion count will have the completions set to 0. Signed-off-by: Yamin Friedman Reviewed-by: Max Gurtovoy Signed-off-by: Saeed Mahameed --- include/linux/dim.h | 28 +++++++++++++++++++++++++--- lib/dim/dim.c | 9 +++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dim.h b/include/linux/dim.h index f48ede3e0322..aa9bdd47a648 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -37,6 +37,7 @@ struct dim_cq_moder { u16 usec; u16 pkts; + u16 comps; u8 cq_period_mode; }; @@ -54,6 +55,7 @@ struct dim_sample { u32 pkt_ctr; u32 byte_ctr; u16 event_ctr; + u32 comp_ctr; }; /** @@ -65,9 +67,11 @@ struct dim_sample { * @epms: Events per msec */ struct dim_stats { - int ppms; - int bpms; - int epms; + int ppms; /* packets per msec */ + int bpms; /* bytes per msec */ + int epms; /* events per msec */ + int cpms; /* completions per msec */ + int cpe_ratio; /* ratio of completions to events */ }; /** @@ -89,6 +93,7 @@ struct dim { u8 state; struct dim_stats prev_stats; struct dim_sample start_sample; + struct dim_sample measuring_sample; struct work_struct work; u8 profile_ix; u8 mode; @@ -246,6 +251,23 @@ dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) s->event_ctr = event_ctr; } +/** + * dim_update_sample_with_comps - set a sample's fields with given + * values including the completion parameter + * @event_ctr: number of events to set + * @packets: number of packets to set + * @bytes: number of bytes to set + * @comps: number of completions to set + * @s: DIM sample + */ +static inline void +dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, + struct dim_sample *s) +{ + dim_update_sample(event_ctr, packets, bytes, s); + s->comp_ctr = comps; +} + /* Net DIM */ /* diff --git a/lib/dim/dim.c b/lib/dim/dim.c index 17d5236759bd..439d641ec796 100644 --- a/lib/dim/dim.c +++ b/lib/dim/dim.c @@ -62,6 +62,8 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, start->byte_ctr); + u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr, + start->comp_ctr); if (!delta_us) return; @@ -70,5 +72,12 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, delta_us); + curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us); + if (curr_stats->epms != 0) + curr_stats->cpe_ratio = + (curr_stats->cpms * 100) / curr_stats->epms; + else + curr_stats->cpe_ratio = 0; + } EXPORT_SYMBOL(dim_calc_stats); -- cgit v1.2.3 From 2f25528e4edddc6eddd42c8d41c9c9e341c8b9da Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 19 Jun 2019 11:39:25 +0200 Subject: clk: Add clk_bulk_get_optional() function clk_bulk_get_optional() allows to get a group of clocks where one or more is optional. For a not available clock, e.g. not specifed in the clock consumer node in DT, its respective struct clk pointer will be NULL. This allows for operating on a group of returned clocks (struct clk_bulk_data array) with existing clk_bulk* APIs. Signed-off-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- drivers/clk/clk-bulk.c | 23 ++++++++++++++++++++--- include/linux/clk.h | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c index 06499568cf07..524bf9a53098 100644 --- a/drivers/clk/clk-bulk.c +++ b/drivers/clk/clk-bulk.c @@ -75,8 +75,8 @@ void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) } EXPORT_SYMBOL_GPL(clk_bulk_put); -int __must_check clk_bulk_get(struct device *dev, int num_clks, - struct clk_bulk_data *clks) +static int __clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks, bool optional) { int ret; int i; @@ -88,10 +88,14 @@ int __must_check clk_bulk_get(struct device *dev, int num_clks, clks[i].clk = clk_get(dev, clks[i].id); if (IS_ERR(clks[i].clk)) { ret = PTR_ERR(clks[i].clk); + clks[i].clk = NULL; + + if (ret == -ENOENT && optional) + continue; + if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to get clk '%s': %d\n", clks[i].id, ret); - clks[i].clk = NULL; goto err; } } @@ -103,8 +107,21 @@ err: return ret; } + +int __must_check clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return __clk_bulk_get(dev, num_clks, clks, false); +} EXPORT_SYMBOL(clk_bulk_get); +int __must_check clk_bulk_get_optional(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return __clk_bulk_get(dev, num_clks, clks, true); +} +EXPORT_SYMBOL_GPL(clk_bulk_get_optional); + void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) { if (IS_ERR_OR_NULL(clks)) diff --git a/include/linux/clk.h b/include/linux/clk.h index f689fc58d7be..1b50e7d1675c 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -332,6 +332,19 @@ int __must_check clk_bulk_get(struct device *dev, int num_clks, */ int __must_check clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); + +/** + * clk_bulk_get_optional - lookup and obtain a number of references to clock producer + * @dev: device for clock "consumer" + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * Behaves the same as clk_bulk_get() except where there is no clock producer. + * In this case, instead of returning -ENOENT, the function returns 0 and + * NULL for a clk for which a clock producer could not be determined. + */ +int __must_check clk_bulk_get_optional(struct device *dev, int num_clks, + struct clk_bulk_data *clks); /** * devm_clk_bulk_get - managed get multiple clk consumers * @dev: device for clock "consumer" @@ -718,6 +731,12 @@ static inline int __must_check clk_bulk_get(struct device *dev, int num_clks, return 0; } +static inline int __must_check clk_bulk_get_optional(struct device *dev, + int num_clks, struct clk_bulk_data *clks) +{ + return 0; +} + static inline int __must_check clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { -- cgit v1.2.3 From 9bd5ef0bd8743700d9adffb6fbb1baa346575457 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 19 Jun 2019 11:39:26 +0200 Subject: clk: Add devm_clk_bulk_get_optional() function Add managed version of the clk_bulk_get_optional() helper function. Signed-off-by: Sylwester Nawrocki [sboyd@kernel.org: Mark __devm_clk_bulk_get() static] Signed-off-by: Stephen Boyd --- drivers/clk/clk-devres.c | 22 +++++++++++++++++++--- include/linux/clk.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index daa1fc8fba53..be160764911b 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -52,8 +52,8 @@ static void devm_clk_bulk_release(struct device *dev, void *res) clk_bulk_put(devres->num_clks, devres->clks); } -int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, - struct clk_bulk_data *clks) +static int __devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks, bool optional) { struct clk_bulk_devres *devres; int ret; @@ -63,7 +63,10 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, if (!devres) return -ENOMEM; - ret = clk_bulk_get(dev, num_clks, clks); + if (optional) + ret = clk_bulk_get_optional(dev, num_clks, clks); + else + ret = clk_bulk_get(dev, num_clks, clks); if (!ret) { devres->clks = clks; devres->num_clks = num_clks; @@ -74,8 +77,21 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, return ret; } + +int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return __devm_clk_bulk_get(dev, num_clks, clks, false); +} EXPORT_SYMBOL_GPL(devm_clk_bulk_get); +int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return __devm_clk_bulk_get(dev, num_clks, clks, true); +} +EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { diff --git a/include/linux/clk.h b/include/linux/clk.h index 1b50e7d1675c..5e7b2dd84965 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -359,6 +359,28 @@ int __must_check clk_bulk_get_optional(struct device *dev, int num_clks, */ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks); +/** + * devm_clk_bulk_get_optional - managed get multiple optional consumer clocks + * @dev: device for clock "consumer" + * @clks: pointer to the clk_bulk_data table of consumer + * + * Behaves the same as devm_clk_bulk_get() except where there is no clock + * producer. In this case, instead of returning -ENOENT, the function returns + * NULL for given clk. It is assumed all clocks in clk_bulk_data are optional. + * + * Returns 0 if all clocks specified in clk_bulk_data table are obtained + * successfully or for any clk there was no clk provider available, otherwise + * returns valid IS_ERR() condition containing errno. + * The implementation uses @dev and @clk_bulk_data.id to determine the + * clock consumer, and thereby the clock producer. + * The clock returned is stored in each @clk_bulk_data.clk field. + * + * Drivers must assume that the clock source is not enabled. + * + * clk_bulk_get should not be called from within interrupt context. + */ +int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, + struct clk_bulk_data *clks); /** * devm_clk_bulk_get_all - managed get multiple clk consumers * @dev: device for clock "consumer" @@ -760,6 +782,12 @@ static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clk return 0; } +static inline int __must_check devm_clk_bulk_get_optional(struct device *dev, + int num_clks, struct clk_bulk_data *clks) +{ + return 0; +} + static inline int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { -- cgit v1.2.3 From 0966648dd5a5f4037d29d233866b7a4db39d07f7 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 25 Jun 2019 14:38:46 +0900 Subject: usb: renesas_usbhs: remove notify_hotplug callback The notify_hotplug callback was supported in v3.10, but the last user (armadillo800eva) was removed by the commit 1fa59bda21c7 ("ARM: shmobile: Remove legacy board code for Armadillo-800 EVA"). So, this patch removes it. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 16 ++++------------ drivers/usb/renesas_usbhs/common.h | 2 ++ drivers/usb/renesas_usbhs/mod.c | 3 ++- drivers/usb/renesas_usbhs/mod_gadget.c | 3 ++- include/linux/usb/renesas_usbhs.h | 26 +------------------------- 5 files changed, 11 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index ebbe322182bd..f6b136a4f91e 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -3,6 +3,7 @@ * Renesas USB driver * * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto */ #include @@ -513,7 +514,7 @@ static void usbhsc_notify_hotplug(struct work_struct *work) usbhsc_hotplug(priv); } -static int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev) +int usbhsc_schedule_notify_hotplug(struct platform_device *pdev) { struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); int delay = usbhs_get_dparam(priv, detection_delay); @@ -667,7 +668,6 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev) static int usbhs_probe(struct platform_device *pdev) { struct renesas_usbhs_platform_info *info = renesas_usbhs_get_info(pdev); - struct renesas_usbhs_driver_callback *dfunc; struct usbhs_priv *priv; struct resource *res, *irq_res; int ret; @@ -721,10 +721,6 @@ static int usbhs_probe(struct platform_device *pdev) } priv->pfunc = info->platform_callback; - /* set driver callback functions for platform */ - dfunc = &info->driver_callback; - dfunc->notify_hotplug = usbhsc_drvcllbck_notify_hotplug; - /* set default param if platform doesn't have */ if (!priv->dparam.pipe_configs) { priv->dparam.pipe_configs = usbhsc_default_pipe; @@ -818,7 +814,7 @@ static int usbhs_probe(struct platform_device *pdev) /* * manual call notify_hotplug for cold plug */ - usbhsc_drvcllbck_notify_hotplug(pdev); + usbhsc_schedule_notify_hotplug(pdev); dev_info(&pdev->dev, "probed\n"); @@ -843,13 +839,9 @@ probe_end_pipe_exit: static int usbhs_remove(struct platform_device *pdev) { struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); - struct renesas_usbhs_platform_info *info = renesas_usbhs_get_info(pdev); - struct renesas_usbhs_driver_callback *dfunc = &info->driver_callback; dev_dbg(&pdev->dev, "usb remove\n"); - dfunc->notify_hotplug = NULL; - /* power off */ if (!usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); @@ -894,7 +886,7 @@ static __maybe_unused int usbhsc_resume(struct device *dev) usbhs_platform_call(priv, phy_reset, pdev); - usbhsc_drvcllbck_notify_hotplug(pdev); + usbhsc_schedule_notify_hotplug(pdev); return 0; } diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index de74ebd1a347..b2b21fbb7ce2 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -3,6 +3,7 @@ * Renesas USB driver * * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto */ #ifndef RENESAS_USB_DRIVER_H @@ -317,6 +318,7 @@ void usbhs_bus_send_sof_enable(struct usbhs_priv *priv); void usbhs_bus_send_reset(struct usbhs_priv *priv); int usbhs_bus_get_speed(struct usbhs_priv *priv); int usbhs_vbus_ctrl(struct usbhs_priv *priv, int enable); +int usbhsc_schedule_notify_hotplug(struct platform_device *pdev); /* * frame diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c index 7475c4f64724..540472abb23a 100644 --- a/drivers/usb/renesas_usbhs/mod.c +++ b/drivers/usb/renesas_usbhs/mod.c @@ -3,6 +3,7 @@ * Renesas USB driver * * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto */ #include @@ -41,7 +42,7 @@ static int usbhsm_autonomy_irq_vbus(struct usbhs_priv *priv, { struct platform_device *pdev = usbhs_priv_to_pdev(priv); - renesas_usbhs_call_notify_hotplug(pdev); + usbhsc_schedule_notify_hotplug(pdev); return 0; } diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 59cac40aafcc..0c1e8fa528fc 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -3,6 +3,7 @@ * Renesas USB driver * * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto */ #include @@ -1023,7 +1024,7 @@ static int usbhsg_vbus_session(struct usb_gadget *gadget, int is_active) gpriv->vbus_active = !!is_active; - renesas_usbhs_call_notify_hotplug(pdev); + usbhsc_schedule_notify_hotplug(pdev); return 0; } diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index b2cba7c74444..ac601be95ec0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -3,6 +3,7 @@ * Renesas USB * * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto * * This program is distributed in the hope that it will be useful, @@ -32,17 +33,6 @@ enum { USBHS_MAX, }; -/* - * callback functions table for driver - * - * These functions are called from platform for driver. - * Callback function's pointer will be set before - * renesas_usbhs_platform_callback :: hardware_init was called - */ -struct renesas_usbhs_driver_callback { - int (*notify_hotplug)(struct platform_device *pdev); -}; - /* * callback functions for platform * @@ -213,12 +203,6 @@ struct renesas_usbhs_platform_info { */ struct renesas_usbhs_platform_callback platform_callback; - /* - * driver set these callback functions pointer. - * platform can use it on callback functions - */ - struct renesas_usbhs_driver_callback driver_callback; - /* * option: * @@ -232,12 +216,4 @@ struct renesas_usbhs_platform_info { */ #define renesas_usbhs_get_info(pdev)\ ((struct renesas_usbhs_platform_info *)(pdev)->dev.platform_data) - -#define renesas_usbhs_call_notify_hotplug(pdev) \ - ({ \ - struct renesas_usbhs_driver_callback *dc; \ - dc = &(renesas_usbhs_get_info(pdev)->driver_callback); \ - if (dc && dc->notify_hotplug) \ - dc->notify_hotplug(pdev); \ - }) #endif /* RENESAS_USB_H */ -- cgit v1.2.3 From df9f2c278b69fcd8b04c89612310f0036d21ec4c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 25 Jun 2019 14:38:49 +0900 Subject: usb: renesas_usbhs: Use a specific flag instead of type for multi_clks To remove the type of renesas_usbhs_driver_param in the future, this patch uses a specific flag "multi_clks". Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 8 +++----- include/linux/usb/renesas_usbhs.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 739fe4b4c1d5..530e2eb7ab08 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -288,11 +288,7 @@ static void usbhsc_set_buswait(struct usbhs_priv *priv) static bool usbhsc_is_multi_clks(struct usbhs_priv *priv) { - if (priv->dparam.type == USBHS_TYPE_RCAR_GEN3 || - priv->dparam.type == USBHS_TYPE_RCAR_GEN3_WITH_PLL) - return true; - - return false; + return priv->dparam.multi_clks; } static int usbhsc_clk_get(struct device *dev, struct usbhs_priv *priv) @@ -544,6 +540,7 @@ static const struct usbhs_of_data rcar_gen3_data = { .param = { .type = USBHS_TYPE_RCAR_GEN3, .has_usb_dmac = 1, + .multi_clks = 1, .pipe_configs = usbhsc_new_pipe, .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), } @@ -554,6 +551,7 @@ static const struct usbhs_of_data rcar_gen3_with_pll_data = { .param = { .type = USBHS_TYPE_RCAR_GEN3_WITH_PLL, .has_usb_dmac = 1, + .multi_clks = 1, .pipe_configs = usbhsc_new_pipe, .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), } diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index ac601be95ec0..e249c217cad1 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -181,6 +181,7 @@ struct renesas_usbhs_driver_param { u32 has_cnen:1; u32 cfifo_byte_addr:1; /* CFIFO is byte addressable */ #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ + u32 multi_clks:1; }; #define USBHS_TYPE_RCAR_GEN2 1 -- cgit v1.2.3 From a4027b409fa98dc47418dacd3dcb5c99c5a76e4d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 25 Jun 2019 14:38:50 +0900 Subject: usb: renesas_usbhs: Remove type member from renesas_usbhs_driver_param Now no one uses the type member so that this patch removes it. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 5 ----- include/linux/usb/renesas_usbhs.h | 7 ------- 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 530e2eb7ab08..18727561fa65 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -528,7 +528,6 @@ int usbhsc_schedule_notify_hotplug(struct platform_device *pdev) static const struct usbhs_of_data rcar_gen2_data = { .platform_callback = &usbhs_rcar2_ops, .param = { - .type = USBHS_TYPE_RCAR_GEN2, .has_usb_dmac = 1, .pipe_configs = usbhsc_new_pipe, .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), @@ -538,7 +537,6 @@ static const struct usbhs_of_data rcar_gen2_data = { static const struct usbhs_of_data rcar_gen3_data = { .platform_callback = &usbhs_rcar3_ops, .param = { - .type = USBHS_TYPE_RCAR_GEN3, .has_usb_dmac = 1, .multi_clks = 1, .pipe_configs = usbhsc_new_pipe, @@ -549,7 +547,6 @@ static const struct usbhs_of_data rcar_gen3_data = { static const struct usbhs_of_data rcar_gen3_with_pll_data = { .platform_callback = &usbhs_rcar3_with_pll_ops, .param = { - .type = USBHS_TYPE_RCAR_GEN3_WITH_PLL, .has_usb_dmac = 1, .multi_clks = 1, .pipe_configs = usbhsc_new_pipe, @@ -560,7 +557,6 @@ static const struct usbhs_of_data rcar_gen3_with_pll_data = { static const struct usbhs_of_data rza1_data = { .platform_callback = &usbhs_rza1_ops, .param = { - .type = USBHS_TYPE_RZA1, .pipe_configs = usbhsc_new_pipe, .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), } @@ -569,7 +565,6 @@ static const struct usbhs_of_data rza1_data = { static const struct usbhs_of_data rza2_data = { .platform_callback = &usbhs_rza2_ops, .param = { - .type = USBHS_TYPE_RZA2, .has_cnen = 1, .cfifo_byte_addr = 1, .pipe_configs = usbhsc_new_pipe, diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index e249c217cad1..fee84b7d4d2a 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -170,7 +170,6 @@ struct renesas_usbhs_driver_param { */ int pio_dma_border; /* default is 64byte */ - uintptr_t type; u32 enable_gpio; /* @@ -184,12 +183,6 @@ struct renesas_usbhs_driver_param { u32 multi_clks:1; }; -#define USBHS_TYPE_RCAR_GEN2 1 -#define USBHS_TYPE_RCAR_GEN3 2 -#define USBHS_TYPE_RCAR_GEN3_WITH_PLL 3 -#define USBHS_TYPE_RZA1 4 -#define USBHS_TYPE_RZA2 5 - /* * option: * -- cgit v1.2.3 From 98e86506c24932a30f50ffcfcbc98b04e3c9bc60 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 25 Jun 2019 14:38:52 +0900 Subject: usb: renesas_usbhs: Add has_new_pipe_configs flag In the future, each struct renesas_usbhs_driver_param is stored on the each platform related source code (e.g. rcar3.c). So, to simplify the source code, this patch adds a new flag has_new_pipe_configs. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 20 +++++++++----------- include/linux/usb/renesas_usbhs.h | 1 + 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 35b06e7d4eb4..f9476a07b0e9 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -529,8 +529,7 @@ static const struct usbhs_of_data rcar_gen2_data = { .platform_callback = &usbhs_rcar2_ops, .param = { .has_usb_dmac = 1, - .pipe_configs = usbhsc_new_pipe, - .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + .has_new_pipe_configs = 1, } }; @@ -539,8 +538,7 @@ static const struct usbhs_of_data rcar_gen3_data = { .param = { .has_usb_dmac = 1, .multi_clks = 1, - .pipe_configs = usbhsc_new_pipe, - .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + .has_new_pipe_configs = 1, } }; @@ -549,16 +547,14 @@ static const struct usbhs_of_data rcar_gen3_with_pll_data = { .param = { .has_usb_dmac = 1, .multi_clks = 1, - .pipe_configs = usbhsc_new_pipe, - .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + .has_new_pipe_configs = 1, } }; static const struct usbhs_of_data rza1_data = { .platform_callback = &usbhs_rza1_ops, .param = { - .pipe_configs = usbhsc_new_pipe, - .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + .has_new_pipe_configs = 1, } }; @@ -567,8 +563,7 @@ static const struct usbhs_of_data rza2_data = { .param = { .has_cnen = 1, .cfifo_byte_addr = 1, - .pipe_configs = usbhsc_new_pipe, - .pipe_size = ARRAY_SIZE(usbhsc_new_pipe), + .has_new_pipe_configs = 1, } }; @@ -715,7 +710,10 @@ static int usbhs_probe(struct platform_device *pdev) priv->pfunc = info->platform_callback; /* set default param if platform doesn't have */ - if (!priv->dparam.pipe_configs) { + if (usbhs_get_dparam(priv, has_new_pipe_configs)) { + priv->dparam.pipe_configs = usbhsc_new_pipe; + priv->dparam.pipe_size = ARRAY_SIZE(usbhsc_new_pipe); + } else if (!priv->dparam.pipe_configs) { priv->dparam.pipe_configs = usbhsc_default_pipe; priv->dparam.pipe_size = ARRAY_SIZE(usbhsc_default_pipe); } diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index fee84b7d4d2a..6914475bbc86 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -181,6 +181,7 @@ struct renesas_usbhs_driver_param { u32 cfifo_byte_addr:1; /* CFIFO is byte addressable */ #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ u32 multi_clks:1; + u32 has_new_pipe_configs:1; }; /* -- cgit v1.2.3 From bcc61569997b2188ba89db43b5b991da01ea2d18 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 25 Jun 2019 13:32:41 +0200 Subject: cpufreq: Move the IS_ENABLED(CPU_THERMAL) macro into a stub cpufreq_online() and cpufreq_offline() [un]register the driver as a cooling device. This is done if the driver is flagged as a cooling device in addition with an IS_ENABLED() check to compile out the branching code. Group this test in a stub function added in the cpufreq header instead of having the IS_ENABLED() in the code. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++---- include/linux/cpufreq.h | 6 ++++++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 85ff958e01f1..aee024e42618 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1378,8 +1378,7 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (IS_ENABLED(CONFIG_CPU_THERMAL) && - cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) + if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1469,8 +1468,7 @@ static int cpufreq_offline(unsigned int cpu) goto unlock; } - if (IS_ENABLED(CONFIG_CPU_THERMAL) && - cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) { + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { cpufreq_cooling_unregister(policy->cdev); policy->cdev = NULL; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d01a74fbc4db..a1467aa7f58b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -409,6 +409,12 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); +static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv) +{ + return IS_ENABLED(CONFIG_CPU_THERMAL) && + (drv->flags & CPUFREQ_IS_COOLING_DEV); +} + static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { -- cgit v1.2.3 From 4a6ef8e37c4d9a40f09438068da1734fd965bd75 Mon Sep 17 00:00:00 2001 From: Nikolaus Voss Date: Wed, 12 Jun 2019 10:36:07 +0200 Subject: pwm: Add support referencing PWMs from ACPI In analogy to referencing a GPIO using the "gpios" property from ACPI, support referencing a PWM using the "pwms" property. ACPI entries must look like Package () {"pwms", Package () { , , [, ]}} In contrast to the DT implementation, only _one_ PWM entry in the "pwms" property is supported. As a consequence "pwm-names"-property and con_id lookup aren't supported. Support for ACPI is added via the firmware-node framework which is an abstraction layer on top of ACPI/DT. To keep this patch clean, DT and ACPI paths are kept separate. The firmware-node framework could be used to unify both paths in a future patch. To support leds-pwm driver, an additional method devm_fwnode_pwm_get() which supports both ACPI and DT configuration is exported. Signed-off-by: Nikolaus Voss [thierry.reding@gmail.com: fix build failures for !ACPI] Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pwm.h | 10 +++++ 2 files changed, 132 insertions(+) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 60b8ccc1fd7c..c1dbb5f6ebd2 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -19,6 +19,7 @@ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -750,6 +751,85 @@ put: } EXPORT_SYMBOL_GPL(of_pwm_get); +#if IS_ENABLED(CONFIG_ACPI) +static struct pwm_chip *device_to_pwmchip(struct device *dev) +{ + struct pwm_chip *chip; + + mutex_lock(&pwm_lock); + + list_for_each_entry(chip, &pwm_chips, list) { + struct acpi_device *adev = ACPI_COMPANION(chip->dev); + + if ((chip->dev == dev) || (adev && &adev->dev == dev)) { + mutex_unlock(&pwm_lock); + return chip; + } + } + + mutex_unlock(&pwm_lock); + + return ERR_PTR(-EPROBE_DEFER); +} +#endif + +/** + * acpi_pwm_get() - request a PWM via parsing "pwms" property in ACPI + * @fwnode: firmware node to get the "pwm" property from + * + * Returns the PWM device parsed from the fwnode and index specified in the + * "pwms" property or a negative error-code on failure. + * Values parsed from the device tree are stored in the returned PWM device + * object. + * + * This is analogous to of_pwm_get() except con_id is not yet supported. + * ACPI entries must look like + * Package () {"pwms", Package () + * { , , [, ]}} + * + * Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded + * error code on failure. + */ +static struct pwm_device *acpi_pwm_get(struct fwnode_handle *fwnode) +{ + struct pwm_device *pwm = ERR_PTR(-ENODEV); +#if IS_ENABLED(CONFIG_ACPI) + struct fwnode_reference_args args; + struct acpi_device *acpi; + struct pwm_chip *chip; + int ret; + + memset(&args, 0, sizeof(args)); + + ret = __acpi_node_get_property_reference(fwnode, "pwms", 0, 3, &args); + if (ret < 0) + return ERR_PTR(ret); + + acpi = to_acpi_device_node(args.fwnode); + if (!acpi) + return ERR_PTR(-EINVAL); + + if (args.nargs < 2) + return ERR_PTR(-EPROTO); + + chip = device_to_pwmchip(&acpi->dev); + if (IS_ERR(chip)) + return ERR_CAST(chip); + + pwm = pwm_request_from_chip(chip, args.args[0], NULL); + if (IS_ERR(pwm)) + return pwm; + + pwm->args.period = args.args[1]; + pwm->args.polarity = PWM_POLARITY_NORMAL; + + if (args.nargs > 2 && args.args[2] & PWM_POLARITY_INVERTED) + pwm->args.polarity = PWM_POLARITY_INVERSED; +#endif + + return pwm; +} + /** * pwm_add_table() - register PWM device consumers * @table: array of consumers to register @@ -814,6 +894,10 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) return of_pwm_get(dev, dev->of_node, con_id); + /* then lookup via ACPI */ + if (dev && is_acpi_node(dev->fwnode)) + return acpi_pwm_get(dev->fwnode); + /* * We look up the provider in the static table typically provided by * board setup code. We first try to lookup the consumer device by @@ -999,6 +1083,44 @@ struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, } EXPORT_SYMBOL_GPL(devm_of_pwm_get); +/** + * devm_fwnode_pwm_get() - request a resource managed PWM from firmware node + * @dev: device for PWM consumer + * @fwnode: firmware node to get the PWM from + * @con_id: consumer name + * + * Returns the PWM device parsed from the firmware node. See of_pwm_get() and + * acpi_pwm_get() for a detailed description. + * + * Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded + * error code on failure. + */ +struct pwm_device *devm_fwnode_pwm_get(struct device *dev, + struct fwnode_handle *fwnode, + const char *con_id) +{ + struct pwm_device **ptr, *pwm = ERR_PTR(-ENODEV); + + ptr = devres_alloc(devm_pwm_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + if (is_of_node(fwnode)) + pwm = of_pwm_get(dev, to_of_node(fwnode), con_id); + else if (is_acpi_node(fwnode)) + pwm = acpi_pwm_get(fwnode); + + if (!IS_ERR(pwm)) { + *ptr = pwm; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return pwm; +} +EXPORT_SYMBOL_GPL(devm_fwnode_pwm_get); + static int devm_pwm_match(struct device *dev, void *res, void *data) { struct pwm_device **p = res; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 8bf5d5f6267d..24632a7a7d11 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -412,6 +412,9 @@ void pwm_put(struct pwm_device *pwm); struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id); struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, const char *con_id); +struct pwm_device *devm_fwnode_pwm_get(struct device *dev, + struct fwnode_handle *fwnode, + const char *con_id); void devm_pwm_put(struct device *dev, struct pwm_device *pwm); #else static inline struct pwm_device *pwm_request(int pwm_id, const char *label) @@ -518,6 +521,13 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev, return ERR_PTR(-ENODEV); } +static inline struct pwm_device * +devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, + const char *con_id) +{ + return ERR_PTR(-ENODEV); +} + static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm) { } -- cgit v1.2.3 From 550113d4e9f5c7b62be760fc01178c9e0139c1f4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 24 Jun 2019 19:04:02 +0200 Subject: i2c: add newly exported functions to the header, too Nobody (including me) noticed that these functions were exported but not added to the header :/ Fixes: 7159dbdae3c5 ("i2c: core: improve return value handling of i2c_new_device and i2c_new_dummy") Signed-off-by: Wolfram Sang Reviewed-by: Bartosz Golaszewski Reviewed-by: Kieran Bingham Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 5 ++--- include/linux/i2c.h | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index e7d5ada40d48..f1949d1e2b54 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -729,7 +729,7 @@ static int i2c_dev_irq_from_resources(const struct resource *resources, * This returns the new i2c client, which may be saved for later use with * i2c_unregister_device(); or an ERR_PTR to describe the error. */ -static struct i2c_client * +struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info) { struct i2c_client *client; @@ -895,8 +895,7 @@ static struct i2c_driver dummy_driver = { * This returns the new i2c client, which should be saved for later use with * i2c_unregister_device(); or an ERR_PTR to describe the error. */ -static struct i2c_client * -i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address) +struct i2c_client *i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address) { struct i2c_board_info info = { I2C_BOARD_INFO("dummy", address), diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d8f9060179d0..fa5552c2307b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -442,6 +442,9 @@ struct i2c_board_info { extern struct i2c_client * i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); +extern struct i2c_client * +i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); + /* If you don't know the exact address of an I2C device, use this variant * instead, which can probe for device presence in a list of possible * addresses. The "probe" callback function is optional. If it is provided, @@ -463,6 +466,9 @@ extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short ad extern struct i2c_client * i2c_new_dummy(struct i2c_adapter *adap, u16 address); +extern struct i2c_client * +i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address); + extern struct i2c_client * devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address); -- cgit v1.2.3 From 4ae4916b56435d1d5066616120f9ff907bd96b86 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Tue, 28 May 2019 10:59:15 -0700 Subject: i40e: fix 'Unknown bps' in dmesg for 2.5Gb/5Gb speeds This patch fixes 'NIC Link is Up, Unknown bps' message in dmesg for 2.5Gb/5Gb speeds. This problem is fixed by adding constants for VIRTCHNL_LINK_SPEED_2_5GB and VIRTCHNL_LINK_SPEED_5GB cases in the i40e_virtchnl_link_speed() function. Signed-off-by: Aleksandr Loktionov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 4 ++++ include/linux/avf/virtchnl.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 882627073dce..eac88bcc6c06 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -350,6 +350,10 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed) return VIRTCHNL_LINK_SPEED_100MB; case I40E_LINK_SPEED_1GB: return VIRTCHNL_LINK_SPEED_1GB; + case I40E_LINK_SPEED_2_5GB: + return VIRTCHNL_LINK_SPEED_2_5GB; + case I40E_LINK_SPEED_5GB: + return VIRTCHNL_LINK_SPEED_5GB; case I40E_LINK_SPEED_10GB: return VIRTCHNL_LINK_SPEED_10GB; case I40E_LINK_SPEED_40GB: diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 191621ff7594..ca956b672ac0 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -61,12 +61,14 @@ enum virtchnl_status_code { #define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM #define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED +#define VIRTCHNL_LINK_SPEED_2_5GB_SHIFT 0x0 #define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3 #define VIRTCHNL_LINK_SPEED_40GB_SHIFT 0x4 #define VIRTCHNL_LINK_SPEED_20GB_SHIFT 0x5 #define VIRTCHNL_LINK_SPEED_25GB_SHIFT 0x6 +#define VIRTCHNL_LINK_SPEED_5GB_SHIFT 0x7 enum virtchnl_link_speed { VIRTCHNL_LINK_SPEED_UNKNOWN = 0, @@ -76,6 +78,8 @@ enum virtchnl_link_speed { VIRTCHNL_LINK_SPEED_40GB = BIT(VIRTCHNL_LINK_SPEED_40GB_SHIFT), VIRTCHNL_LINK_SPEED_20GB = BIT(VIRTCHNL_LINK_SPEED_20GB_SHIFT), VIRTCHNL_LINK_SPEED_25GB = BIT(VIRTCHNL_LINK_SPEED_25GB_SHIFT), + VIRTCHNL_LINK_SPEED_2_5GB = BIT(VIRTCHNL_LINK_SPEED_2_5GB_SHIFT), + VIRTCHNL_LINK_SPEED_5GB = BIT(VIRTCHNL_LINK_SPEED_5GB_SHIFT), }; /* for hsplit_0 field of Rx HMC context */ -- cgit v1.2.3 From 65c0f2c1663649217455a73d48b1c303f133180a Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:50 +0000 Subject: net/mlx5: Introduce vport metadata matching bits and enum constants When a dual-port VHCA sends a RoCE packet on its non-native port, and the packet arrives to its affiliated vport FDB, a mismatch might occur on the rules that match the packet source vport. So we replace the match on source port with the match on metadata that was configured in ingress ACL, and that metadata will be passed further also to the NIC RX table of the eswitch manager. Introduce vport metadata matching bits and enum constants as a pre-step towards metadata matching. o metadata type C registers in the misc parameters 2 fields. o esw_uplink_ingress_acl bit in esw cap. If it set, the device supports ingress ACL for the uplink vport. o fdb_to_vport_reg_* bits in flow table cap and esw vport context, to support propagating the metadata to the nic rx through the loopback path. o flow_source in flow context, to indicate the known origin of packets. o enum constants, to support the above bits. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 56 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e3c154b573a2..d4409654f760 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -528,7 +528,21 @@ struct mlx5_ifc_fte_match_set_misc2_bits { struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp; - u8 reserved_at_80[0x100]; + u8 metadata_reg_c_7[0x20]; + + u8 metadata_reg_c_6[0x20]; + + u8 metadata_reg_c_5[0x20]; + + u8 metadata_reg_c_4[0x20]; + + u8 metadata_reg_c_3[0x20]; + + u8 metadata_reg_c_2[0x20]; + + u8 metadata_reg_c_1[0x20]; + + u8 metadata_reg_c_0[0x20]; u8 metadata_reg_a[0x20]; @@ -636,8 +650,22 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_at_e00[0x7200]; }; +enum { + MLX5_FDB_TO_VPORT_REG_C_0 = 0x01, + MLX5_FDB_TO_VPORT_REG_C_1 = 0x02, + MLX5_FDB_TO_VPORT_REG_C_2 = 0x04, + MLX5_FDB_TO_VPORT_REG_C_3 = 0x08, + MLX5_FDB_TO_VPORT_REG_C_4 = 0x10, + MLX5_FDB_TO_VPORT_REG_C_5 = 0x20, + MLX5_FDB_TO_VPORT_REG_C_6 = 0x40, + MLX5_FDB_TO_VPORT_REG_C_7 = 0x80, +}; + struct mlx5_ifc_flow_table_eswitch_cap_bits { - u8 reserved_at_0[0x1a]; + u8 fdb_to_vport_reg_c_id[0x8]; + u8 reserved_at_8[0xf]; + u8 flow_source[0x1]; + u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; u8 reserved_at_1b[0x1]; u8 fdb_multi_path_to_table[0x1]; @@ -665,7 +693,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x14]; + u8 reserved_at_5[0x3]; + u8 esw_uplink_ingress_acl[0x1]; + u8 reserved_at_9[0x10]; u8 esw_functions_changed[0x1]; u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; @@ -2555,6 +2585,12 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, }; +enum { + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT = 0x0, + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK = 0x1, + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2, +}; + struct mlx5_ifc_vlan_bits { u8 ethtype[0x10]; u8 prio[0x3]; @@ -2574,7 +2610,9 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_80[0x7]; + u8 reserved_at_81[0x1]; + u8 flow_source[0x2]; + u8 reserved_at_84[0x4]; u8 destination_list_size[0x18]; u8 reserved_at_a0[0x8]; @@ -3099,12 +3137,14 @@ struct mlx5_ifc_hca_vport_context_bits { }; struct mlx5_ifc_esw_vport_context_bits { - u8 reserved_at_0[0x3]; + u8 fdb_to_vport_reg_c[0x1]; + u8 reserved_at_1[0x2]; u8 vport_svlan_strip[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert[0x2]; - u8 reserved_at_8[0x18]; + u8 fdb_to_vport_reg_c_id[0x8]; + u8 reserved_at_10[0x10]; u8 reserved_at_20[0x20]; @@ -4985,7 +5025,8 @@ struct mlx5_ifc_modify_esw_vport_context_out_bits { }; struct mlx5_ifc_esw_vport_context_fields_select_bits { - u8 reserved_at_0[0x1c]; + u8 reserved_at_0[0x1b]; + u8 fdb_to_vport_reg_c_id[0x1]; u8 vport_cvlan_insert[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_strip[0x1]; @@ -5182,6 +5223,7 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17, MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { -- cgit v1.2.3 From bb0ee7dcc4ecd6af39823b80ae3995ddc119c373 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:58 +0000 Subject: net/mlx5: Add flow context for flow tag Refactor the flow data structures, add new flow_context and move flow_tag into it, as flow_tag doesn't belong to the rule action. Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/flow.c | 13 +++++---- drivers/infiniband/hw/mlx5/main.c | 30 ++++++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + .../mellanox/mlx5/core/diag/fs_tracepoint.h | 2 +- .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +++-- .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 8 +++-- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 34 +++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + include/linux/mlx5/fs.h | 15 +++++++--- 11 files changed, 71 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 1fc302d41a53..b8841355fcd5 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct uverbs_attr_bundle *attrs) { - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; void *devx_obj; int dest_id, dest_type; void *cmd_in; @@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( arr_flow_actions[i]->object); } - ret = uverbs_copy_from(&flow_act.flow_tag, attrs, + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, MLX5_IB_ATTR_CREATE_FLOW_TAG); if (!ret) { - if (flow_act.flow_tag >= BIT(24)) { + if (flow_context.flow_tag >= BIT(24)) { ret = -EINVAL; goto err_out; } - flow_act.flags |= FLOW_ACT_HAS_TAG; + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; } - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, + &flow_context, + &flow_act, counter_id, cmd_in, inlen, dest_id, dest_type); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index abac70ad5c7c..be4c9a687df7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2666,11 +2666,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, } } -static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, - u32 *match_v, const union ib_flow_spec *ib_spec, +static int parse_flow_attr(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, struct mlx5_flow_act *action, u32 prev_type) { + struct mlx5_flow_context *flow_context = &spec->flow_context; + u32 *match_c = spec->match_criteria; + u32 *match_v = spec->match_value; void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, @@ -2989,8 +2993,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ib_spec->flow_tag.tag_id >= BIT(24)) return -EINVAL; - action->flow_tag = ib_spec->flow_tag.tag_id; - action->flags |= FLOW_ACT_HAS_TAG; + flow_context->flow_tag = ib_spec->flow_tag.tag_id; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; break; case IB_FLOW_SPEC_ACTION_DROP: if (FIELDS_NOT_SUPPORTED(ib_spec->drop, @@ -3084,7 +3088,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, return VALID_SPEC_NA; return is_crypto && is_ipsec && - (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? + (!egress || (!is_drop && + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? VALID_SPEC_VALID : VALID_SPEC_INVALID; } @@ -3473,7 +3478,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_act flow_act = {}; struct mlx5_flow_spec *spec; struct mlx5_flow_destination dest_arr[2] = {}; struct mlx5_flow_destination *rule_dst = dest_arr; @@ -3504,8 +3509,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec->match_criteria, - spec->match_value, + err = parse_flow_attr(dev->mdev, spec, ib_flow, flow_attr, &flow_act, prev_type); if (err < 0) @@ -3572,11 +3576,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } - if ((flow_act.flags & FLOW_ACT_HAS_TAG) && + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - flow_act.flow_tag, flow_attr->type); + spec->flow_context.flow_tag, flow_attr->type); err = -EINVAL; goto free; } @@ -3947,6 +3951,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, int dst_num) @@ -3969,6 +3974,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, fs_matcher->mask_len); spec->match_criteria_enable = fs_matcher->match_criteria_enable; + spec->flow_context = *flow_context; handler->rule = mlx5_add_flow_rules(ft, spec, flow_act, dst, dst_num); @@ -4033,6 +4039,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, @@ -4085,7 +4092,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, dst_num++; } - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, + flow_context, flow_act, cmd_in, inlen, dst_num); if (IS_ERR(handler)) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a043af7ee366..1c205c2bd486 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1317,6 +1317,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..9ec46edf22a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -204,7 +204,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..839662644ed3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -426,7 +426,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 122f457091a2..8ff1ca46d8d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -716,19 +716,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 52c47d3dd5a5..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 4f1d402926f1..fb1335a433ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -396,7 +396,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fb5b61727ee7..9f5544ac6b8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -584,7 +584,7 @@ err_ida_remove: } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -1428,7 +1429,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1436,12 +1439,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1449,7 +1452,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1460,7 +1463,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1635,7 +1638,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1651,8 +1654,7 @@ search_again_locked: fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1699,8 +1701,7 @@ skip_search: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1786,7 +1787,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1800,8 +1801,7 @@ search_again_locked: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a08c3d09a50f..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -170,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 2ddaa97f2179..9bf49ce218fa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -88,10 +88,20 @@ struct mlx5_flow_group; struct mlx5_flow_namespace; struct mlx5_flow_handle; +enum { + FLOW_CONTEXT_HAS_TAG = BIT(0), +}; + +struct mlx5_flow_context { + u32 flags; + u32 flow_tag; +}; + struct mlx5_flow_spec { u8 match_criteria_enable; u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; + struct mlx5_flow_context flow_context; }; enum { @@ -173,13 +183,11 @@ struct mlx5_fs_vlan { #define MLX5_FS_VLAN_DEPTH 2 enum { - FLOW_ACT_HAS_TAG = BIT(0), - FLOW_ACT_NO_APPEND = BIT(1), + FLOW_ACT_NO_APPEND = BIT(0), }; struct mlx5_flow_act { u32 action; - u32 flow_tag; u32 reformat_id; u32 modify_id; uintptr_t esp_id; @@ -190,7 +198,6 @@ struct mlx5_flow_act { #define MLX5_DECLARE_FLOW_ACT(name) \ struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \ .reformat_id = 0, \ .modify_id = 0, \ .flags = 0, } -- cgit v1.2.3 From 7445cfb1169cebf8f79763acf65f85d850850461 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:00 +0000 Subject: net/mlx5: E-Switch, Tag packet with vport number in VF vports and uplink ingress ACLs When a dual-port VHCA sends a RoCE packet on its non-native port, and the packet arrives to its affiliated vport FDB, a mismatch might occur on the rules that match the packet source vport as it is not represented by single VHCA only in this case. So we change to match on metadata instead of source vport. To do that, a rule is created in all vports and uplink ingress ACLs, to save the source vport number and vhca id in the packet's metadata in order to match on it later. The metadata register used is the first of the 32-bit type C registers. It can be used for matching and header modify operations. The higher 16 bits of this register are for vhca id, and the lower 16 ones is for vport number. This change is not for dual-port RoCE only. If HW and FW allow, the vport metadata matching is enabled by default. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 9 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 180 ++++++++++++++++----- include/linux/mlx5/eswitch.h | 17 ++ 4 files changed, 172 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a42a23e505df..1235fd84ae3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 99dc25630629..51e71b824abf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -196,6 +198,10 @@ struct mlx5_esw_functions { u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -203,6 +209,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -240,6 +247,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c1c42c1370b8..4bcbc872cd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN, allow + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow * Unmatched traffic is allowed by default */ - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable prio tag ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].ethtype = ETH_P_8021Q; flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); @@ -1616,6 +1606,58 @@ out_no_mem: return err; } +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1623,6 +1665,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default @@ -1676,27 +1721,75 @@ out_no_mem: return err; } -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = NULL; - int i, j; int err; - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto err_ingress; - err = esw_vport_egress_prio_tag_config(esw, vport); + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i, j; + int err; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); if (err) - goto err_egress; + goto err_ingress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } } + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + return 0; err_egress: esw_vport_disable_ingress_acl(esw, vport); err_ingress: - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1704,15 +1797,17 @@ err_ingress: return err; } -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) { + mlx5_esw_for_all_vports(esw, i, vport) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) @@ -1722,15 +1817,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, nvports); - if (err) - return err; - } + err = esw_create_offloads_acl_tables(esw); + if (err) + return err; err = esw_create_offloads_fdb_tables(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw, nvports); if (err) @@ -1748,6 +1841,9 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1756,8 +1852,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) - esw_prio_tag_acls_cleanup(esw); + esw_destroy_offloads_acl_tables(esw); } static void esw_functions_changed_event_handler(struct work_struct *work) @@ -2296,3 +2391,16 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) return vport_num >= MLX5_VPORT_FIRST_VF && vport_num <= esw->dev->priv.sriov.max_vfs; } + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 174eec0871d9..aece3ae1902d 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -67,11 +67,28 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, #ifdef CONFIG_MLX5_ESWITCH enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ static inline enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) { return DEVLINK_ESWITCH_ENCAP_MODE_NONE; } + +static inline bool +mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return false; +}; + +static inline u32 +mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + int vport_num) +{ + return 0; +}; #endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3 From 8d212ff057f8b81ed6ed418874b54ded3bf97ad4 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:02 +0000 Subject: net/mlx5e: Specifying known origin of packets matching the flow In vport metadata matching, source port number is replaced by metadata. While FW has no idea about what it is in the metadata, a syndrome will happen. Specify a known origin to avoid the syndrome. However, there is no functional change because ANY_VPORT (0) is filled in flow_source, the same default value as before, as a pre-step towards metadata matching for fast path. There are two other values can be filled in flow_source. When setting 0x1, packet matching this rule is from uplink, while 0x2 is for packet from other local vports. Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +++ include/linux/mlx5/fs.h | 1 + 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 9ec46edf22a6..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -205,6 +206,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index fb1335a433ae..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -398,6 +398,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 9bf49ce218fa..dc7e7aa53a13 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -95,6 +95,7 @@ enum { struct mlx5_flow_context { u32 flags; u32 flow_tag; + u32 flow_source; }; struct mlx5_flow_spec { -- cgit v1.2.3 From 3b8c4a08a471d56ecaaca939c972fdf5b8255629 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2019 16:10:16 +0100 Subject: keys: Kill off request_key_async{,_with_auxdata} Kill off request_key_async{,_with_auxdata}() as they're not currently used. Signed-off-by: David Howells --- Documentation/security/keys/core.rst | 32 ------------------ Documentation/security/keys/request-key.rst | 23 ++----------- include/linux/key.h | 11 ------- security/keys/request_key.c | 50 ----------------------------- 4 files changed, 2 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index 003f1452a5b7..a0e245f9576f 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1115,38 +1115,6 @@ payload contents" for more information. is a blob of length callout_len, if given (the length may be 0). - * A key can be requested asynchronously by calling one of:: - - struct key *request_key_async(const struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len); - - or:: - - struct key *request_key_async_with_auxdata(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len, - void *aux); - - which are asynchronous equivalents of request_key() and - request_key_with_auxdata() respectively. - - These two functions return with the key potentially still under - construction. To wait for construction completion, the following should be - called:: - - int wait_for_key_construction(struct key *key, bool intr); - - The function will wait for the key to finish being constructed and then - invokes key_validate() to return an appropriate value to indicate the state - of the key (0 indicates the key is usable). - - If intr is true, then the wait can be interrupted by a signal, in which - case error ERESTARTSYS will be returned. - - * To search for a key under RCU conditions, call:: struct key *request_key_rcu(const struct key_type *type, diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst index 45049abdf290..5a210baa583a 100644 --- a/Documentation/security/keys/request-key.rst +++ b/Documentation/security/keys/request-key.rst @@ -21,21 +21,6 @@ or:: size_t callout_len, void *aux); -or:: - - struct key *request_key_async(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len); - -or:: - - struct key *request_key_async_with_auxdata(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len, - void *aux); - or:: struct key *request_key_rcu(const struct key_type *type, @@ -53,15 +38,11 @@ does not need to link the key to a keyring to prevent it from being immediately destroyed. The kernel interface returns a pointer directly to the key, and it's up to the caller to destroy the key. -The request_key*_with_auxdata() calls are like the in-kernel request_key*() -calls, except that they permit auxiliary data to be passed to the upcaller (the +The request_key_with_auxdata() calls is like the in-kernel request_key() call, +except that they permit auxiliary data to be passed to the upcaller (the default is NULL). This is only useful for those key types that define their own upcall mechanism rather than using /sbin/request-key. -The two async in-kernel calls may return keys that are still in the process of -being constructed. The two non-async ones will wait for construction to -complete first. - The request_key_rcu() call is like the in-kernel request_key() call, except that it doesn't check for keys that are under construction and doesn't attempt to construct missing keys. diff --git a/include/linux/key.h b/include/linux/key.h index 3604a554df99..4cd5669184f3 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -283,17 +283,6 @@ extern struct key *request_key_with_auxdata(struct key_type *type, size_t callout_len, void *aux); -extern struct key *request_key_async(struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len); - -extern struct key *request_key_async_with_auxdata(struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len, - void *aux); - extern int wait_for_key_construction(struct key *key, bool intr); extern int key_validate(const struct key *key); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index f289982cb5db..36c55ef47b9e 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -739,56 +739,6 @@ struct key *request_key_with_auxdata(struct key_type *type, } EXPORT_SYMBOL(request_key_with_auxdata); -/* - * request_key_async - Request a key (allow async construction) - * @type: Type of key. - * @description: The searchable description of the key. - * @callout_info: The data to pass to the instantiation upcall (or NULL). - * @callout_len: The length of callout_info. - * - * As for request_key_and_link() except that it does not add the returned key - * to a keyring if found, new keys are always allocated in the user's quota and - * no auxiliary data can be passed. - * - * The caller should call wait_for_key_construction() to wait for the - * completion of the returned key if it is still undergoing construction. - */ -struct key *request_key_async(struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len) -{ - return request_key_and_link(type, description, callout_info, - callout_len, NULL, NULL, - KEY_ALLOC_IN_QUOTA); -} -EXPORT_SYMBOL(request_key_async); - -/* - * request a key with auxiliary data for the upcaller (allow async construction) - * @type: Type of key. - * @description: The searchable description of the key. - * @callout_info: The data to pass to the instantiation upcall (or NULL). - * @callout_len: The length of callout_info. - * @aux: Auxiliary data for the upcall. - * - * As for request_key_and_link() except that it does not add the returned key - * to a keyring if found and new keys are always allocated in the user's quota. - * - * The caller should call wait_for_key_construction() to wait for the - * completion of the returned key if it is still undergoing construction. - */ -struct key *request_key_async_with_auxdata(struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len, - void *aux) -{ - return request_key_and_link(type, description, callout_info, - callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA); -} -EXPORT_SYMBOL(request_key_async_with_auxdata); - /** * request_key_rcu - Request key from RCU-read-locked context * @type: The type of key we want. -- cgit v1.2.3 From f771fde82051976a6fc0fd570f8b86de4a92124b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:31 +0100 Subject: keys: Simplify key description management Simplify key description management by cramming the word containing the length with the first few chars of the description also. This simplifies the code that generates the index-key used by assoc_array. It should speed up key searching a bit too. Signed-off-by: David Howells --- include/linux/key.h | 14 +++++++++- security/keys/internal.h | 6 ++++ security/keys/key.c | 2 ++ security/keys/keyring.c | 70 ++++++++++++++-------------------------------- security/keys/persistent.c | 1 + 5 files changed, 43 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 4cd5669184f3..86ccc2d010f6 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -86,9 +86,20 @@ struct keyring_list; struct keyring_name; struct keyring_index_key { + union { + struct { +#ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ + u8 desc_len; + char desc[sizeof(long) - 1]; /* First few chars of description */ +#else + char desc[sizeof(long) - 1]; /* First few chars of description */ + u8 desc_len; +#endif + }; + unsigned long x; + }; struct key_type *type; const char *description; - size_t desc_len; }; union key_payload { @@ -202,6 +213,7 @@ struct key { union { struct keyring_index_key index_key; struct { + unsigned long len_desc; struct key_type *type; /* type of key */ char *description; }; diff --git a/security/keys/internal.h b/security/keys/internal.h index 3d5c08db74d2..ee71c72fc5f0 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -90,6 +90,12 @@ extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; +static inline void key_set_index_key(struct keyring_index_key *index_key) +{ + size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); + memcpy(index_key->desc, index_key->description, n); +} + extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); diff --git a/security/keys/key.c b/security/keys/key.c index e792d65c0af8..0a3828f15f57 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -285,6 +285,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); if (!key->index_key.description) goto no_memory_3; + key_set_index_key(&key->index_key); refcount_set(&key->usage, 1); init_rwsem(&key->sem); @@ -868,6 +869,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, goto error_free_prep; } index_key.desc_len = strlen(index_key.description); + key_set_index_key(&index_key); ret = __key_link_lock(keyring, &index_key); if (ret < 0) { diff --git a/security/keys/keyring.c b/security/keys/keyring.c index afa6d4024c67..ebf52077598f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -179,9 +179,9 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde int n, desc_len = index_key->desc_len; type = (unsigned long)index_key->type; - acc = mult_64x32_and_fold(type, desc_len + 13); acc = mult_64x32_and_fold(acc, 9207); + for (;;) { n = desc_len; if (n <= 0) @@ -215,23 +215,13 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde /* * Build the next index key chunk. * - * On 32-bit systems the index key is laid out as: - * - * 0 4 5 9... - * hash desclen typeptr desc[] - * - * On 64-bit systems: - * - * 0 8 9 17... - * hash desclen typeptr desc[] - * * We return it one word-sized chunk at a time. */ static unsigned long keyring_get_key_chunk(const void *data, int level) { const struct keyring_index_key *index_key = data; unsigned long chunk = 0; - long offset = 0; + const u8 *d; int desc_len = index_key->desc_len, n = sizeof(chunk); level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; @@ -239,33 +229,23 @@ static unsigned long keyring_get_key_chunk(const void *data, int level) case 0: return hash_key_type_and_desc(index_key); case 1: - return ((unsigned long)index_key->type << 8) | desc_len; + return index_key->x; case 2: - if (desc_len == 0) - return (u8)((unsigned long)index_key->type >> - (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); - n--; - offset = 1; - /* fall through */ + return (unsigned long)index_key->type; default: - offset += sizeof(chunk) - 1; - offset += (level - 3) * sizeof(chunk); - if (offset >= desc_len) + level -= 3; + if (desc_len <= sizeof(index_key->desc)) return 0; - desc_len -= offset; + + d = index_key->description + sizeof(index_key->desc); + d += level * sizeof(long); + desc_len -= sizeof(index_key->desc); if (desc_len > n) desc_len = n; - offset += desc_len; do { chunk <<= 8; - chunk |= ((u8*)index_key->description)[--offset]; + chunk |= *d++; } while (--desc_len > 0); - - if (level == 2) { - chunk <<= 8; - chunk |= (u8)((unsigned long)index_key->type >> - (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); - } return chunk; } } @@ -304,39 +284,28 @@ static int keyring_diff_objects(const void *object, const void *data) seg_b = hash_key_type_and_desc(b); if ((seg_a ^ seg_b) != 0) goto differ; + level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; /* The number of bits contributed by the hash is controlled by a * constant in the assoc_array headers. Everything else thereafter we * can deal with as being machine word-size dependent. */ - level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; - seg_a = a->desc_len; - seg_b = b->desc_len; + seg_a = a->x; + seg_b = b->x; if ((seg_a ^ seg_b) != 0) goto differ; + level += sizeof(unsigned long); /* The next bit may not work on big endian */ - level++; seg_a = (unsigned long)a->type; seg_b = (unsigned long)b->type; if ((seg_a ^ seg_b) != 0) goto differ; - level += sizeof(unsigned long); - if (a->desc_len == 0) - goto same; - i = 0; - if (((unsigned long)a->description | (unsigned long)b->description) & - (sizeof(unsigned long) - 1)) { - do { - seg_a = *(unsigned long *)(a->description + i); - seg_b = *(unsigned long *)(b->description + i); - if ((seg_a ^ seg_b) != 0) - goto differ_plus_i; - i += sizeof(unsigned long); - } while (i < (a->desc_len & (sizeof(unsigned long) - 1))); - } + i = sizeof(a->desc); + if (a->desc_len <= i) + goto same; for (; i < a->desc_len; i++) { seg_a = *(unsigned char *)(a->description + i); @@ -662,6 +631,9 @@ static bool search_nested_keyrings(struct key *keyring, BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); + if (ctx->index_key.description) + key_set_index_key(&ctx->index_key); + /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ diff --git a/security/keys/persistent.c b/security/keys/persistent.c index d0cb5b32eff7..fc29ec59efa7 100644 --- a/security/keys/persistent.c +++ b/security/keys/persistent.c @@ -87,6 +87,7 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid, index_key.type = &key_type_keyring; index_key.description = buf; index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); + key_set_index_key(&index_key); if (ns->persistent_keyring_register) { reg_ref = make_key_ref(ns->persistent_keyring_register, true); -- cgit v1.2.3 From 355ef8e15885020da88f5ba2d85ce42b1d01f537 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Cache the hash value to avoid lots of recalculation Cache the hash of the key's type and description in the index key so that we're not recalculating it every time we look at a key during a search. The hash function does a bunch of multiplications, so evading those is probably worthwhile - especially as this is done for every key examined during a search. This also allows the methods used by assoc_array to get chunks of index-key to be simplified. Signed-off-by: David Howells --- include/linux/key.h | 3 +++ security/keys/internal.h | 8 +------- security/keys/key.c | 2 +- security/keys/keyring.c | 28 ++++++++++++++++++++-------- 4 files changed, 25 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 86ccc2d010f6..fb2debcacea0 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -86,6 +86,8 @@ struct keyring_list; struct keyring_name; struct keyring_index_key { + /* [!] If this structure is altered, the union in struct key must change too! */ + unsigned long hash; /* Hash value */ union { struct { #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ @@ -213,6 +215,7 @@ struct key { union { struct keyring_index_key index_key; struct { + unsigned long hash; unsigned long len_desc; struct key_type *type; /* type of key */ char *description; diff --git a/security/keys/internal.h b/security/keys/internal.h index ee71c72fc5f0..4305414795ae 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -89,13 +89,7 @@ extern spinlock_t key_serial_lock; extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; - -static inline void key_set_index_key(struct keyring_index_key *index_key) -{ - size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); - memcpy(index_key->desc, index_key->description, n); -} - +extern void key_set_index_key(struct keyring_index_key *index_key); extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); diff --git a/security/keys/key.c b/security/keys/key.c index 0a3828f15f57..9d52f2472a09 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -285,12 +285,12 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); if (!key->index_key.description) goto no_memory_3; + key->index_key.type = type; key_set_index_key(&key->index_key); refcount_set(&key->usage, 1); init_rwsem(&key->sem); lockdep_set_class(&key->sem, &type->lock_class); - key->index_key.type = type; key->user = user; key->quotalen = quotalen; key->datalen = type->def_datalen; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index ebf52077598f..a5ee3b4d2eb8 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -168,7 +168,7 @@ static u64 mult_64x32_and_fold(u64 x, u32 y) /* * Hash a key type and description. */ -static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) +static void hash_key_type_and_desc(struct keyring_index_key *index_key) { const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK; @@ -206,10 +206,22 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde * zero for keyrings and non-zero otherwise. */ if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0) - return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; - if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) - return (hash + (hash << level_shift)) & ~fan_mask; - return hash; + hash |= (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; + else if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) + hash = (hash + (hash << level_shift)) & ~fan_mask; + index_key->hash = hash; +} + +/* + * Finalise an index key to include a part of the description actually in the + * index key and to add in the hash too. + */ +void key_set_index_key(struct keyring_index_key *index_key) +{ + size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); + memcpy(index_key->desc, index_key->description, n); + + hash_key_type_and_desc(index_key); } /* @@ -227,7 +239,7 @@ static unsigned long keyring_get_key_chunk(const void *data, int level) level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; switch (level) { case 0: - return hash_key_type_and_desc(index_key); + return index_key->hash; case 1: return index_key->x; case 2: @@ -280,8 +292,8 @@ static int keyring_diff_objects(const void *object, const void *data) int level, i; level = 0; - seg_a = hash_key_type_and_desc(a); - seg_b = hash_key_type_and_desc(b); + seg_a = a->hash; + seg_b = b->hash; if ((seg_a ^ seg_b) != 0) goto differ; level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; -- cgit v1.2.3 From dcf49dbc8077e278ddd1bc7298abc781496e8a08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Add a 'recurse' flag for keyring searches Add a 'recurse' flag for keyring searches so that the flag can be omitted and recursion disabled, thereby allowing just the nominated keyring to be searched and none of the children. Signed-off-by: David Howells --- Documentation/security/keys/core.rst | 10 ++++++---- certs/blacklist.c | 2 +- crypto/asymmetric_keys/asymmetric_type.c | 2 +- include/linux/key.h | 3 ++- lib/digsig.c | 2 +- net/rxrpc/security.c | 2 +- security/integrity/digsig_asymmetric.c | 4 ++-- security/keys/internal.h | 1 + security/keys/keyctl.c | 2 +- security/keys/keyring.c | 12 ++++++++++-- security/keys/proc.c | 3 ++- security/keys/process_keys.c | 3 ++- security/keys/request_key.c | 3 ++- security/keys/request_key_auth.c | 3 ++- 14 files changed, 34 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index a0e245f9576f..ae930ae9d590 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1162,11 +1162,13 @@ payload contents" for more information. key_ref_t keyring_search(key_ref_t keyring_ref, const struct key_type *type, - const char *description) + const char *description, + bool recurse) - This searches the keyring tree specified for a matching key. Error ENOKEY - is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful, - the returned key will need to be released. + This searches the specified keyring only (recurse == false) or keyring tree + (recurse == true) specified for a matching key. Error ENOKEY is returned + upon failure (use IS_ERR/PTR_ERR to determine). If successful, the returned + key will need to be released. The possession attribute from the keyring reference is used to control access through the permissions mask and is propagated to the returned key diff --git a/certs/blacklist.c b/certs/blacklist.c index 3a507b9e2568..181cb7fa9540 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -128,7 +128,7 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type) *p = 0; kref = keyring_search(make_key_ref(blacklist_keyring, true), - &key_type_blacklist, buffer); + &key_type_blacklist, buffer, false); if (!IS_ERR(kref)) { key_ref_put(kref); ret = -EKEYREJECTED; diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index 69a0788a7de5..084027ef3121 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -87,7 +87,7 @@ struct key *find_asymmetric_key(struct key *keyring, pr_debug("Look up: \"%s\"\n", req); ref = keyring_search(make_key_ref(keyring, 1), - &key_type_asymmetric, req); + &key_type_asymmetric, req, true); if (IS_ERR(ref)) pr_debug("Request for key '%s' err %ld\n", req, PTR_ERR(ref)); kfree(req); diff --git a/include/linux/key.h b/include/linux/key.h index fb2debcacea0..ff102731b3db 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -341,7 +341,8 @@ extern int keyring_clear(struct key *keyring); extern key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, - const char *description); + const char *description, + bool recurse); extern int keyring_add_key(struct key *keyring, struct key *key); diff --git a/lib/digsig.c b/lib/digsig.c index 3b0a579bdcdf..3782af401c68 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -221,7 +221,7 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen, /* search in specific keyring */ key_ref_t kref; kref = keyring_search(make_key_ref(keyring, 1UL), - &key_type_user, name); + &key_type_user, name, true); if (IS_ERR(kref)) key = ERR_CAST(kref); else diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index c4479afe8ae7..2cfc7125bc41 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -148,7 +148,7 @@ found_service: /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), - &key_type_rxrpc_s, kdesc); + &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index 99080871eb9f..358f614811e8 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -39,7 +39,7 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid) key_ref_t kref; kref = keyring_search(make_key_ref(key, 1), - &key_type_asymmetric, name); + &key_type_asymmetric, name, true); if (!IS_ERR(kref)) { pr_err("Key '%s' is in ima_blacklist_keyring\n", name); return ERR_PTR(-EKEYREJECTED); @@ -51,7 +51,7 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid) key_ref_t kref; kref = keyring_search(make_key_ref(keyring, 1), - &key_type_asymmetric, name); + &key_type_asymmetric, name, true); if (IS_ERR(kref)) key = ERR_CAST(kref); else diff --git a/security/keys/internal.h b/security/keys/internal.h index 4305414795ae..aa361299a3ec 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -127,6 +127,7 @@ struct keyring_search_context { #define KEYRING_SEARCH_NO_CHECK_PERM 0x0008 /* Don't check permissions */ #define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0010 /* Give an error on excessive depth */ #define KEYRING_SEARCH_SKIP_EXPIRED 0x0020 /* Ignore expired keys (intention to replace) */ +#define KEYRING_SEARCH_RECURSE 0x0040 /* Search child keyrings also */ int (*iterator)(const void *object, void *iterator_data); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 9f418e66f067..169409b611b0 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -762,7 +762,7 @@ long keyctl_keyring_search(key_serial_t ringid, } /* do the search */ - key_ref = keyring_search(keyring_ref, ktype, description); + key_ref = keyring_search(keyring_ref, ktype, description, true); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); diff --git a/security/keys/keyring.c b/security/keys/keyring.c index a5ee3b4d2eb8..20891cd198f0 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -685,6 +685,9 @@ descend_to_keyring: * Non-keyrings avoid the leftmost branch of the root entirely (root * slots 1-15). */ + if (!(ctx->flags & KEYRING_SEARCH_RECURSE)) + goto not_this_keyring; + ptr = READ_ONCE(keyring->keys.root); if (!ptr) goto not_this_keyring; @@ -885,13 +888,15 @@ key_ref_t keyring_search_rcu(key_ref_t keyring_ref, * @keyring: The root of the keyring tree to be searched. * @type: The type of keyring we want to find. * @description: The name of the keyring we want to find. + * @recurse: True to search the children of @keyring also * * As keyring_search_rcu() above, but using the current task's credentials and * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, - const char *description) + const char *description, + bool recurse) { struct keyring_search_context ctx = { .index_key.type = type, @@ -906,6 +911,8 @@ key_ref_t keyring_search(key_ref_t keyring, key_ref_t key; int ret; + if (recurse) + ctx.flags |= KEYRING_SEARCH_RECURSE; if (type->match_preparse) { ret = type->match_preparse(&ctx.match_data); if (ret < 0) @@ -1176,7 +1183,8 @@ static int keyring_detect_cycle(struct key *A, struct key *B) .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_NO_UPDATE_TIME | KEYRING_SEARCH_NO_CHECK_PERM | - KEYRING_SEARCH_DETECT_TOO_DEEP), + KEYRING_SEARCH_DETECT_TOO_DEEP | + KEYRING_SEARCH_RECURSE), }; rcu_read_lock(); diff --git a/security/keys/proc.c b/security/keys/proc.c index f081dceae3b9..b4f5ba56b9cb 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -170,7 +170,8 @@ static int proc_keys_show(struct seq_file *m, void *v) .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, - .flags = KEYRING_SEARCH_NO_STATE_CHECK, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_RECURSE), }; key_ref = make_key_ref(key, 0); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index f8ffb06d0297..b07f768d23dc 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -531,7 +531,8 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, struct keyring_search_context ctx = { .match_data.cmp = lookup_user_key_possessed, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, - .flags = KEYRING_SEARCH_NO_STATE_CHECK, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_RECURSE), }; struct request_key_auth *rka; struct key *key; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 36c55ef47b9e..1ffd3803ce29 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -569,7 +569,8 @@ struct key *request_key_and_link(struct key_type *type, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_DO_STATE_CHECK | - KEYRING_SEARCH_SKIP_EXPIRED), + KEYRING_SEARCH_SKIP_EXPIRED | + KEYRING_SEARCH_RECURSE), }; struct key *key; key_ref_t key_ref; diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 99ed7a8a273d..f613987e8a63 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -252,7 +252,8 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, - .flags = KEYRING_SEARCH_DO_STATE_CHECK, + .flags = (KEYRING_SEARCH_DO_STATE_CHECK | + KEYRING_SEARCH_RECURSE), }; struct key *authkey; key_ref_t authkey_ref; -- cgit v1.2.3 From b206f281d0ee14969878469816a69db22d5838e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Namespace keyring names Keyring names are held in a single global list that any process can pick from by means of keyctl_join_session_keyring (provided the keyring grants Search permission). This isn't very container friendly, however. Make the following changes: (1) Make default session, process and thread keyring names begin with a '.' instead of '_'. (2) Keyrings whose names begin with a '.' aren't added to the list. Such keyrings are system specials. (3) Replace the global list with per-user_namespace lists. A keyring adds its name to the list for the user_namespace that it is currently in. (4) When a user_namespace is deleted, it just removes itself from the keyring name list. The global keyring_name_lock is retained for accessing the name lists. This allows (4) to work. This can be tested by: # keyctl newring foo @s 995906392 # unshare -U $ keyctl show ... 995906392 --alswrv 65534 65534 \_ keyring: foo ... $ keyctl session foo Joined session keyring: 935622349 As can be seen, a new session keyring was created. The capability bit KEYCTL_CAPS1_NS_KEYRING_NAME is set if the kernel is employing this feature. Signed-off-by: David Howells cc: Eric W. Biederman --- include/linux/key.h | 2 + include/linux/user_namespace.h | 5 +++ include/uapi/linux/keyctl.h | 1 + kernel/user.c | 3 ++ kernel/user_namespace.c | 7 +-- security/keys/keyctl.c | 3 +- security/keys/keyring.c | 99 ++++++++++++++++++------------------------ 7 files changed, 60 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index ff102731b3db..ae1177302d70 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -361,6 +361,7 @@ extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, key_perm_t perm); +extern void key_free_user_ns(struct user_namespace *); /* * The permissions required on a key that we're looking up. @@ -434,6 +435,7 @@ extern void key_init(void); #define key_fsuid_changed(c) do { } while(0) #define key_fsgid_changed(c) do { } while(0) #define key_init() do { } while(0) +#define key_free_user_ns(ns) do { } while(0) #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index d6b74b91096b..90457015fa3f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -64,6 +64,11 @@ struct user_namespace { struct ns_common ns; unsigned long flags; +#ifdef CONFIG_KEYS + /* List of joinable keyrings in this namespace */ + struct list_head keyring_name_list; +#endif + /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 551b5814f53e..35b405034674 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -128,5 +128,6 @@ struct keyctl_pkey_params { #define KEYCTL_CAPS0_INVALIDATE 0x20 /* KEYCTL_INVALIDATE supported */ #define KEYCTL_CAPS0_RESTRICT_KEYRING 0x40 /* KEYCTL_RESTRICT_KEYRING supported */ #define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */ +#define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */ #endif /* _LINUX_KEYCTL_H */ diff --git a/kernel/user.c b/kernel/user.c index 88b834f0eebc..50979fd1b7aa 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -62,6 +62,9 @@ struct user_namespace init_user_ns = { .ns.ops = &userns_operations, #endif .flags = USERNS_INIT_FLAGS, +#ifdef CONFIG_KEYS + .keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list), +#endif #ifdef CONFIG_PERSISTENT_KEYRINGS .persistent_keyring_register_sem = __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 923414a246e9..bda6e890ad88 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -133,6 +133,9 @@ int create_user_ns(struct cred *new) ns->flags = parent_ns->flags; mutex_unlock(&userns_state_mutex); +#ifdef CONFIG_KEYS + INIT_LIST_HEAD(&ns->keyring_name_list); +#endif #ifdef CONFIG_PERSISTENT_KEYRINGS init_rwsem(&ns->persistent_keyring_register_sem); #endif @@ -196,9 +199,7 @@ static void free_user_ns(struct work_struct *work) kfree(ns->projid_map.reverse); } retire_userns_sysctls(ns); -#ifdef CONFIG_PERSISTENT_KEYRINGS - key_put(ns->persistent_keyring_register); -#endif + key_free_user_ns(ns); ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); dec_user_namespaces(ucounts); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 169409b611b0..8a813220f269 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -30,7 +30,7 @@ #define KEY_MAX_DESC_SIZE 4096 -static const unsigned char keyrings_capabilities[1] = { +static const unsigned char keyrings_capabilities[2] = { [0] = (KEYCTL_CAPS0_CAPABILITIES | (IS_ENABLED(CONFIG_PERSISTENT_KEYRINGS) ? KEYCTL_CAPS0_PERSISTENT_KEYRINGS : 0) | (IS_ENABLED(CONFIG_KEY_DH_OPERATIONS) ? KEYCTL_CAPS0_DIFFIE_HELLMAN : 0) | @@ -40,6 +40,7 @@ static const unsigned char keyrings_capabilities[1] = { KEYCTL_CAPS0_RESTRICT_KEYRING | KEYCTL_CAPS0_MOVE ), + [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME), }; static int key_get_type_from_user(char *type, diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 20891cd198f0..fe851292509e 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -28,11 +29,6 @@ */ #define KEYRING_SEARCH_MAX_DEPTH 6 -/* - * We keep all named keyrings in a hash to speed looking them up. - */ -#define KEYRING_NAME_HASH_SIZE (1 << 5) - /* * We mark pointers we pass to the associative array with bit 1 set if * they're keyrings and clear otherwise. @@ -55,17 +51,20 @@ static inline void *keyring_key_to_ptr(struct key *key) return key; } -static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; static DEFINE_RWLOCK(keyring_name_lock); -static inline unsigned keyring_hash(const char *desc) +/* + * Clean up the bits of user_namespace that belong to us. + */ +void key_free_user_ns(struct user_namespace *ns) { - unsigned bucket = 0; - - for (; *desc; desc++) - bucket += (unsigned char)*desc; + write_lock(&keyring_name_lock); + list_del_init(&ns->keyring_name_list); + write_unlock(&keyring_name_lock); - return bucket & (KEYRING_NAME_HASH_SIZE - 1); +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif } /* @@ -104,23 +103,17 @@ static DEFINE_MUTEX(keyring_serialise_link_lock); /* * Publish the name of a keyring so that it can be found by name (if it has - * one). + * one and it doesn't begin with a dot). */ static void keyring_publish_name(struct key *keyring) { - int bucket; - - if (keyring->description) { - bucket = keyring_hash(keyring->description); + struct user_namespace *ns = current_user_ns(); + if (keyring->description && + keyring->description[0] && + keyring->description[0] != '.') { write_lock(&keyring_name_lock); - - if (!keyring_name_hash[bucket].next) - INIT_LIST_HEAD(&keyring_name_hash[bucket]); - - list_add_tail(&keyring->name_link, - &keyring_name_hash[bucket]); - + list_add_tail(&keyring->name_link, &ns->keyring_name_list); write_unlock(&keyring_name_lock); } } @@ -1097,50 +1090,44 @@ found: */ struct key *find_keyring_by_name(const char *name, bool uid_keyring) { + struct user_namespace *ns = current_user_ns(); struct key *keyring; - int bucket; if (!name) return ERR_PTR(-EINVAL); - bucket = keyring_hash(name); - read_lock(&keyring_name_lock); - if (keyring_name_hash[bucket].next) { - /* search this hash bucket for a keyring with a matching name - * that's readable and that hasn't been revoked */ - list_for_each_entry(keyring, - &keyring_name_hash[bucket], - name_link - ) { - if (!kuid_has_mapping(current_user_ns(), keyring->user->uid)) - continue; - - if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) - continue; + /* Search this hash bucket for a keyring with a matching name that + * grants Search permission and that hasn't been revoked + */ + list_for_each_entry(keyring, &ns->keyring_name_list, name_link) { + if (!kuid_has_mapping(ns, keyring->user->uid)) + continue; - if (strcmp(keyring->description, name) != 0) - continue; + if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) + continue; - if (uid_keyring) { - if (!test_bit(KEY_FLAG_UID_KEYRING, - &keyring->flags)) - continue; - } else { - if (key_permission(make_key_ref(keyring, 0), - KEY_NEED_SEARCH) < 0) - continue; - } + if (strcmp(keyring->description, name) != 0) + continue; - /* we've got a match but we might end up racing with - * key_cleanup() if the keyring is currently 'dead' - * (ie. it has a zero usage count) */ - if (!refcount_inc_not_zero(&keyring->usage)) + if (uid_keyring) { + if (!test_bit(KEY_FLAG_UID_KEYRING, + &keyring->flags)) + continue; + } else { + if (key_permission(make_key_ref(keyring, 0), + KEY_NEED_SEARCH) < 0) continue; - keyring->last_used_at = ktime_get_real_seconds(); - goto out; } + + /* we've got a match but we might end up racing with + * key_cleanup() if the keyring is currently 'dead' + * (ie. it has a zero usage count) */ + if (!refcount_inc_not_zero(&keyring->usage)) + continue; + keyring->last_used_at = ktime_get_real_seconds(); + goto out; } keyring = ERR_PTR(-ENOKEY); -- cgit v1.2.3 From 0f44e4d976f96c6439da0d6717238efa4b91196e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Move the user and user-session keyrings to the user_namespace Move the user and user-session keyrings to the user_namespace struct rather than pinning them from the user_struct struct. This prevents these keyrings from propagating across user-namespaces boundaries with regard to the KEY_SPEC_* flags, thereby making them more useful in a containerised environment. The issue is that a single user_struct may be represent UIDs in several different namespaces. The way the patch does this is by attaching a 'register keyring' in each user_namespace and then sticking the user and user-session keyrings into that. It can then be searched to retrieve them. Signed-off-by: David Howells cc: Jann Horn --- include/linux/sched/user.h | 14 --- include/linux/user_namespace.h | 9 +- kernel/user.c | 7 +- kernel/user_namespace.c | 4 +- security/keys/internal.h | 3 +- security/keys/keyring.c | 1 + security/keys/persistent.c | 8 +- security/keys/process_keys.c | 259 ++++++++++++++++++++++++++--------------- security/keys/request_key.c | 20 ++-- 9 files changed, 196 insertions(+), 129 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 468d2565a9fe..917d88edb7b9 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -7,8 +7,6 @@ #include #include -struct key; - /* * Some day this will be a full-fledged user tracking system.. */ @@ -30,18 +28,6 @@ struct user_struct { unsigned long unix_inflight; /* How many files in flight in unix sockets */ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ -#ifdef CONFIG_KEYS - /* - * These pointers can only change from NULL to a non-NULL value once. - * Writes are protected by key_user_keyring_mutex. - * Unlocked readers should use READ_ONCE() unless they know that - * install_user_keyrings() has been called successfully (which sets - * these members to non-NULL values, preventing further modifications). - */ - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ -#endif - /* Hash table maintenance information */ struct hlist_node uidhash_node; kuid_t uid; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 90457015fa3f..fb9f4f799554 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,14 +65,19 @@ struct user_namespace { unsigned long flags; #ifdef CONFIG_KEYS - /* List of joinable keyrings in this namespace */ + /* List of joinable keyrings in this namespace. Modification access of + * these pointers is controlled by keyring_sem. Once + * user_keyring_register is set, it won't be changed, so it can be + * accessed directly with READ_ONCE(). + */ struct list_head keyring_name_list; + struct key *user_keyring_register; + struct rw_semaphore keyring_sem; #endif /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; - struct rw_semaphore persistent_keyring_register_sem; #endif struct work_struct work; #ifdef CONFIG_SYSCTL diff --git a/kernel/user.c b/kernel/user.c index 50979fd1b7aa..f8519b62cf9a 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -64,10 +64,7 @@ struct user_namespace init_user_ns = { .flags = USERNS_INIT_FLAGS, #ifdef CONFIG_KEYS .keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list), -#endif -#ifdef CONFIG_PERSISTENT_KEYRINGS - .persistent_keyring_register_sem = - __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), + .keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem), #endif }; EXPORT_SYMBOL_GPL(init_user_ns); @@ -143,8 +140,6 @@ static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); - key_put(up->uid_keyring); - key_put(up->session_keyring); kmem_cache_free(uid_cachep, up); } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index bda6e890ad88..c87c2ecc7085 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -135,9 +135,7 @@ int create_user_ns(struct cred *new) #ifdef CONFIG_KEYS INIT_LIST_HEAD(&ns->keyring_name_list); -#endif -#ifdef CONFIG_PERSISTENT_KEYRINGS - init_rwsem(&ns->persistent_keyring_register_sem); + init_rwsem(&ns->keyring_sem); #endif ret = -ENOMEM; if (!setup_userns_sysctls(ns)) diff --git a/security/keys/internal.h b/security/keys/internal.h index aa361299a3ec..d3a9439e2386 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -148,7 +148,8 @@ extern key_ref_t search_process_keyrings_rcu(struct keyring_search_context *ctx) extern struct key *find_keyring_by_name(const char *name, bool uid_keyring); -extern int install_user_keyrings(void); +extern int look_up_user_keyrings(struct key **, struct key **); +extern struct key *get_user_session_keyring_rcu(const struct cred *); extern int install_thread_keyring_to_cred(struct cred *); extern int install_process_keyring_to_cred(struct cred *); extern int install_session_keyring_to_cred(struct cred *, struct key *); diff --git a/security/keys/keyring.c b/security/keys/keyring.c index fe851292509e..3663e5168583 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -62,6 +62,7 @@ void key_free_user_ns(struct user_namespace *ns) list_del_init(&ns->keyring_name_list); write_unlock(&keyring_name_lock); + key_put(ns->user_keyring_register); #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif diff --git a/security/keys/persistent.c b/security/keys/persistent.c index fc29ec59efa7..90303fe4a394 100644 --- a/security/keys/persistent.c +++ b/security/keys/persistent.c @@ -91,9 +91,9 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid, if (ns->persistent_keyring_register) { reg_ref = make_key_ref(ns->persistent_keyring_register, true); - down_read(&ns->persistent_keyring_register_sem); + down_read(&ns->keyring_sem); persistent_ref = find_key_to_update(reg_ref, &index_key); - up_read(&ns->persistent_keyring_register_sem); + up_read(&ns->keyring_sem); if (persistent_ref) goto found; @@ -102,9 +102,9 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid, /* It wasn't in the register, so we'll need to create it. We might * also need to create the register. */ - down_write(&ns->persistent_keyring_register_sem); + down_write(&ns->keyring_sem); persistent_ref = key_create_persistent(ns, uid, &index_key); - up_write(&ns->persistent_keyring_register_sem); + up_write(&ns->keyring_sem); if (!IS_ERR(persistent_ref)) goto found; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b07f768d23dc..f74d64215942 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -19,15 +19,13 @@ #include #include #include +#include #include #include "internal.h" /* Session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); -/* User keyring creation semaphore */ -static DEFINE_MUTEX(key_user_keyring_mutex); - /* The root user's tracking struct */ struct key_user root_key_user = { .usage = REFCOUNT_INIT(3), @@ -39,98 +37,185 @@ struct key_user root_key_user = { }; /* - * Install the user and user session keyrings for the current process's UID. + * Get or create a user register keyring. + */ +static struct key *get_user_register(struct user_namespace *user_ns) +{ + struct key *reg_keyring = READ_ONCE(user_ns->user_keyring_register); + + if (reg_keyring) + return reg_keyring; + + down_write(&user_ns->keyring_sem); + + /* Make sure there's a register keyring. It gets owned by the + * user_namespace's owner. + */ + reg_keyring = user_ns->user_keyring_register; + if (!reg_keyring) { + reg_keyring = keyring_alloc(".user_reg", + user_ns->owner, INVALID_GID, + &init_cred, + KEY_POS_WRITE | KEY_POS_SEARCH | + KEY_USR_VIEW | KEY_USR_READ, + 0, + NULL, NULL); + if (!IS_ERR(reg_keyring)) + smp_store_release(&user_ns->user_keyring_register, + reg_keyring); + } + + up_write(&user_ns->keyring_sem); + + /* We don't return a ref since the keyring is pinned by the user_ns */ + return reg_keyring; +} + +/* + * Look up the user and user session keyrings for the current process's UID, + * creating them if they don't exist. */ -int install_user_keyrings(void) +int look_up_user_keyrings(struct key **_user_keyring, + struct key **_user_session_keyring) { - struct user_struct *user; - const struct cred *cred; - struct key *uid_keyring, *session_keyring; + const struct cred *cred = current_cred(); + struct user_namespace *user_ns = current_user_ns(); + struct key *reg_keyring, *uid_keyring, *session_keyring; key_perm_t user_keyring_perm; + key_ref_t uid_keyring_r, session_keyring_r; + uid_t uid = from_kuid(user_ns, cred->user->uid); char buf[20]; int ret; - uid_t uid; user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; - cred = current_cred(); - user = cred->user; - uid = from_kuid(cred->user_ns, user->uid); - kenter("%p{%u}", user, uid); + kenter("%u", uid); - if (READ_ONCE(user->uid_keyring) && READ_ONCE(user->session_keyring)) { - kleave(" = 0 [exist]"); - return 0; - } + reg_keyring = get_user_register(user_ns); + if (IS_ERR(reg_keyring)) + return PTR_ERR(reg_keyring); - mutex_lock(&key_user_keyring_mutex); + down_write(&user_ns->keyring_sem); ret = 0; - if (!user->uid_keyring) { - /* get the UID-specific keyring - * - there may be one in existence already as it may have been - * pinned by a session, but the user_struct pointing to it - * may have been destroyed by setuid */ - sprintf(buf, "_uid.%u", uid); - - uid_keyring = find_keyring_by_name(buf, true); + /* Get the user keyring. Note that there may be one in existence + * already as it may have been pinned by a session, but the user_struct + * pointing to it may have been destroyed by setuid. + */ + snprintf(buf, sizeof(buf), "_uid.%u", uid); + uid_keyring_r = keyring_search(make_key_ref(reg_keyring, true), + &key_type_keyring, buf, false); + kdebug("_uid %p", uid_keyring_r); + if (uid_keyring_r == ERR_PTR(-EAGAIN)) { + uid_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID, + cred, user_keyring_perm, + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, + NULL, reg_keyring); if (IS_ERR(uid_keyring)) { - uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, - cred, user_keyring_perm, - KEY_ALLOC_UID_KEYRING | - KEY_ALLOC_IN_QUOTA, - NULL, NULL); - if (IS_ERR(uid_keyring)) { - ret = PTR_ERR(uid_keyring); - goto error; - } + ret = PTR_ERR(uid_keyring); + goto error; } + } else if (IS_ERR(uid_keyring_r)) { + ret = PTR_ERR(uid_keyring_r); + goto error; + } else { + uid_keyring = key_ref_to_ptr(uid_keyring_r); + } - /* get a default session keyring (which might also exist - * already) */ - sprintf(buf, "_uid_ses.%u", uid); - - session_keyring = find_keyring_by_name(buf, true); + /* Get a default session keyring (which might also exist already) */ + snprintf(buf, sizeof(buf), "_uid_ses.%u", uid); + session_keyring_r = keyring_search(make_key_ref(reg_keyring, true), + &key_type_keyring, buf, false); + kdebug("_uid_ses %p", session_keyring_r); + if (session_keyring_r == ERR_PTR(-EAGAIN)) { + session_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID, + cred, user_keyring_perm, + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, + NULL, NULL); if (IS_ERR(session_keyring)) { - session_keyring = - keyring_alloc(buf, user->uid, INVALID_GID, - cred, user_keyring_perm, - KEY_ALLOC_UID_KEYRING | - KEY_ALLOC_IN_QUOTA, - NULL, NULL); - if (IS_ERR(session_keyring)) { - ret = PTR_ERR(session_keyring); - goto error_release; - } - - /* we install a link from the user session keyring to - * the user keyring */ - ret = key_link(session_keyring, uid_keyring); - if (ret < 0) - goto error_release_both; + ret = PTR_ERR(session_keyring); + goto error_release; } - /* install the keyrings */ - /* paired with READ_ONCE() */ - smp_store_release(&user->uid_keyring, uid_keyring); - /* paired with READ_ONCE() */ - smp_store_release(&user->session_keyring, session_keyring); + /* We install a link from the user session keyring to + * the user keyring. + */ + ret = key_link(session_keyring, uid_keyring); + if (ret < 0) + goto error_release_session; + + /* And only then link the user-session keyring to the + * register. + */ + ret = key_link(reg_keyring, session_keyring); + if (ret < 0) + goto error_release_session; + } else if (IS_ERR(session_keyring_r)) { + ret = PTR_ERR(session_keyring_r); + goto error_release; + } else { + session_keyring = key_ref_to_ptr(session_keyring_r); } - mutex_unlock(&key_user_keyring_mutex); + up_write(&user_ns->keyring_sem); + + if (_user_session_keyring) + *_user_session_keyring = session_keyring; + else + key_put(session_keyring); + if (_user_keyring) + *_user_keyring = uid_keyring; + else + key_put(uid_keyring); kleave(" = 0"); return 0; -error_release_both: +error_release_session: key_put(session_keyring); error_release: key_put(uid_keyring); error: - mutex_unlock(&key_user_keyring_mutex); + up_write(&user_ns->keyring_sem); kleave(" = %d", ret); return ret; } +/* + * Get the user session keyring if it exists, but don't create it if it + * doesn't. + */ +struct key *get_user_session_keyring_rcu(const struct cred *cred) +{ + struct key *reg_keyring = READ_ONCE(cred->user_ns->user_keyring_register); + key_ref_t session_keyring_r; + char buf[20]; + + struct keyring_search_context ctx = { + .index_key.type = &key_type_keyring, + .index_key.description = buf, + .cred = cred, + .match_data.cmp = key_default_cmp, + .match_data.raw_data = buf, + .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .flags = KEYRING_SEARCH_DO_STATE_CHECK, + }; + + if (!reg_keyring) + return NULL; + + ctx.index_key.desc_len = snprintf(buf, sizeof(buf), "_uid_ses.%u", + from_kuid(cred->user_ns, + cred->user->uid)); + + session_keyring_r = keyring_search_rcu(make_key_ref(reg_keyring, true), + &ctx); + if (IS_ERR(session_keyring_r)) + return NULL; + return key_ref_to_ptr(session_keyring_r); +} + /* * Install a thread keyring to the given credentials struct if it didn't have * one already. This is allowed to overrun the quota. @@ -340,6 +425,7 @@ void key_fsgid_changed(struct cred *new_cred) */ key_ref_t search_cred_keyrings_rcu(struct keyring_search_context *ctx) { + struct key *user_session; key_ref_t key_ref, ret, err; const struct cred *cred = ctx->cred; @@ -415,10 +501,11 @@ key_ref_t search_cred_keyrings_rcu(struct keyring_search_context *ctx) } } /* or search the user-session keyring */ - else if (READ_ONCE(cred->user->session_keyring)) { - key_ref = keyring_search_rcu( - make_key_ref(READ_ONCE(cred->user->session_keyring), 1), - ctx); + else if ((user_session = get_user_session_keyring_rcu(cred))) { + key_ref = keyring_search_rcu(make_key_ref(user_session, 1), + ctx); + key_put(user_session); + if (!IS_ERR(key_ref)) goto found; @@ -535,7 +622,7 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, KEYRING_SEARCH_RECURSE), }; struct request_key_auth *rka; - struct key *key; + struct key *key, *user_session; key_ref_t key_ref, skey_ref; int ret; @@ -584,20 +671,20 @@ try_again: if (!ctx.cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ - ret = install_user_keyrings(); + ret = look_up_user_keyrings(NULL, &user_session); if (ret < 0) goto error; if (lflags & KEY_LOOKUP_CREATE) ret = join_session_keyring(NULL); else - ret = install_session_keyring( - ctx.cred->user->session_keyring); + ret = install_session_keyring(user_session); + key_put(user_session); if (ret < 0) goto error; goto reget_creds; - } else if (ctx.cred->session_keyring == - READ_ONCE(ctx.cred->user->session_keyring) && + } else if (test_bit(KEY_FLAG_UID_KEYRING, + &ctx.cred->session_keyring->flags) && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); if (ret < 0) @@ -611,26 +698,16 @@ try_again: break; case KEY_SPEC_USER_KEYRING: - if (!READ_ONCE(ctx.cred->user->uid_keyring)) { - ret = install_user_keyrings(); - if (ret < 0) - goto error; - } - - key = ctx.cred->user->uid_keyring; - __key_get(key); + ret = look_up_user_keyrings(&key, NULL); + if (ret < 0) + goto error; key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!READ_ONCE(ctx.cred->user->session_keyring)) { - ret = install_user_keyrings(); - if (ret < 0) - goto error; - } - - key = ctx.cred->user->session_keyring; - __key_get(key); + ret = look_up_user_keyrings(NULL, &key); + if (ret < 0) + goto error; key_ref = make_key_ref(key, 1); break; @@ -879,7 +956,7 @@ void key_change_session_keyring(struct callback_head *twork) */ static int __init init_root_keyring(void) { - return install_user_keyrings(); + return look_up_user_keyrings(NULL, NULL); } late_initcall(init_root_keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 1ffd3803ce29..9201ca96c4df 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -121,7 +121,7 @@ static int call_sbin_request_key(struct key *authkey, void *aux) struct request_key_auth *rka = get_request_key_auth(authkey); const struct cred *cred = current_cred(); key_serial_t prkey, sskey; - struct key *key = rka->target_key, *keyring, *session; + struct key *key = rka->target_key, *keyring, *session, *user_session; char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; char desc[20]; @@ -129,9 +129,9 @@ static int call_sbin_request_key(struct key *authkey, void *aux) kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op); - ret = install_user_keyrings(); + ret = look_up_user_keyrings(NULL, &user_session); if (ret < 0) - goto error_alloc; + goto error_us; /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); @@ -169,7 +169,7 @@ static int call_sbin_request_key(struct key *authkey, void *aux) session = cred->session_keyring; if (!session) - session = cred->user->session_keyring; + session = user_session; sskey = session->serial; sprintf(keyring_str[2], "%d", sskey); @@ -211,6 +211,8 @@ error_link: key_put(keyring); error_alloc: + key_put(user_session); +error_us: complete_request_key(authkey, ret); kleave(" = %d", ret); return ret; @@ -317,13 +319,15 @@ static int construct_get_dest_keyring(struct key **_dest_keyring) /* fall through */ case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = - key_get(READ_ONCE(cred->user->session_keyring)); + ret = look_up_user_keyrings(NULL, &dest_keyring); + if (ret < 0) + return ret; break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = - key_get(READ_ONCE(cred->user->uid_keyring)); + ret = look_up_user_keyrings(&dest_keyring, NULL); + if (ret < 0) + return ret; break; case KEY_REQKEY_DEFL_GROUP_KEYRING: -- cgit v1.2.3 From 3b6e4de05e9ee2e2f94e4a3fe14d945e2418d9a8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Include target namespace in match criteria Currently a key has a standard matching criteria of { type, description } and this is used to only allow keys with unique criteria in a keyring. This means, however, that you cannot have keys with the same type and description but a different target namespace in the same keyring. This is a potential problem for a containerised environment where, say, a container is made up of some parts of its mount space involving netfs superblocks from two different network namespaces. This is also a problem for shared system management keyrings such as the DNS records keyring or the NFS idmapper keyring that might contain keys from different network namespaces. Fix this by including a namespace component in a key's matching criteria. Keyring types are marked to indicate which, if any, namespace is relevant to keys of that type, and that namespace is set when the key is created from the current task's namespace set. The capability bit KEYCTL_CAPS1_NS_KEY_TAG is set if the kernel is employing this feature. Signed-off-by: David Howells --- include/linux/key.h | 10 ++++++++++ include/uapi/linux/keyctl.h | 1 + security/keys/gc.c | 2 +- security/keys/key.c | 1 + security/keys/keyctl.c | 3 ++- security/keys/keyring.c | 36 ++++++++++++++++++++++++++++++++++-- security/keys/persistent.c | 1 + 7 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index ae1177302d70..abc68555bac3 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -82,9 +82,16 @@ struct cred; struct key_type; struct key_owner; +struct key_tag; struct keyring_list; struct keyring_name; +struct key_tag { + struct rcu_head rcu; + refcount_t usage; + bool removed; /* T when subject removed */ +}; + struct keyring_index_key { /* [!] If this structure is altered, the union in struct key must change too! */ unsigned long hash; /* Hash value */ @@ -101,6 +108,7 @@ struct keyring_index_key { unsigned long x; }; struct key_type *type; + struct key_tag *domain_tag; /* Domain of operation */ const char *description; }; @@ -218,6 +226,7 @@ struct key { unsigned long hash; unsigned long len_desc; struct key_type *type; /* type of key */ + struct key_tag *domain_tag; /* Domain of operation */ char *description; }; }; @@ -268,6 +277,7 @@ extern struct key *key_alloc(struct key_type *type, extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); +extern bool key_put_tag(struct key_tag *tag); static inline struct key *__key_get(struct key *key) { diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 35b405034674..ed3d5893830d 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -129,5 +129,6 @@ struct keyctl_pkey_params { #define KEYCTL_CAPS0_RESTRICT_KEYRING 0x40 /* KEYCTL_RESTRICT_KEYRING supported */ #define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */ #define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */ +#define KEYCTL_CAPS1_NS_KEY_TAG 0x02 /* Key indexing can include a namespace tag */ #endif /* _LINUX_KEYCTL_H */ diff --git a/security/keys/gc.c b/security/keys/gc.c index 634e96b380e8..83d279fb7793 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -154,7 +154,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) atomic_dec(&key->user->nikeys); key_user_put(key->user); - + key_put_tag(key->domain_tag); kfree(key->description); memzero_explicit(key, sizeof(*key)); diff --git a/security/keys/key.c b/security/keys/key.c index 9d52f2472a09..85fdc2ea6c14 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -317,6 +317,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, goto security_error; /* publish the key by giving it a serial number */ + refcount_inc(&key->domain_tag->usage); atomic_inc(&user->nkeys); key_alloc_serial(key); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 8a813220f269..4bb5781d3ddf 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -40,7 +40,8 @@ static const unsigned char keyrings_capabilities[2] = { KEYCTL_CAPS0_RESTRICT_KEYRING | KEYCTL_CAPS0_MOVE ), - [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME), + [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME | + KEYCTL_CAPS1_NS_KEY_TAG), }; static int key_get_type_from_user(char *type, diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 3663e5168583..0da8fa282d56 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -175,6 +175,9 @@ static void hash_key_type_and_desc(struct keyring_index_key *index_key) type = (unsigned long)index_key->type; acc = mult_64x32_and_fold(type, desc_len + 13); acc = mult_64x32_and_fold(acc, 9207); + piece = (unsigned long)index_key->domain_tag; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); for (;;) { n = desc_len; @@ -208,16 +211,36 @@ static void hash_key_type_and_desc(struct keyring_index_key *index_key) /* * Finalise an index key to include a part of the description actually in the - * index key and to add in the hash too. + * index key, to set the domain tag and to calculate the hash. */ void key_set_index_key(struct keyring_index_key *index_key) { + static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), }; size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); + memcpy(index_key->desc, index_key->description, n); + index_key->domain_tag = &default_domain_tag; hash_key_type_and_desc(index_key); } +/** + * key_put_tag - Release a ref on a tag. + * @tag: The tag to release. + * + * This releases a reference the given tag and returns true if that ref was the + * last one. + */ +bool key_put_tag(struct key_tag *tag) +{ + if (refcount_dec_and_test(&tag->usage)) { + kfree_rcu(tag, rcu); + return true; + } + + return false; +} + /* * Build the next index key chunk. * @@ -238,8 +261,10 @@ static unsigned long keyring_get_key_chunk(const void *data, int level) return index_key->x; case 2: return (unsigned long)index_key->type; + case 3: + return (unsigned long)index_key->domain_tag; default: - level -= 3; + level -= 4; if (desc_len <= sizeof(index_key->desc)) return 0; @@ -268,6 +293,7 @@ static bool keyring_compare_object(const void *object, const void *data) const struct key *key = keyring_ptr_to_key(object); return key->index_key.type == index_key->type && + key->index_key.domain_tag == index_key->domain_tag && key->index_key.desc_len == index_key->desc_len && memcmp(key->index_key.description, index_key->description, index_key->desc_len) == 0; @@ -309,6 +335,12 @@ static int keyring_diff_objects(const void *object, const void *data) goto differ; level += sizeof(unsigned long); + seg_a = (unsigned long)a->domain_tag; + seg_b = (unsigned long)b->domain_tag; + if ((seg_a ^ seg_b) != 0) + goto differ; + level += sizeof(unsigned long); + i = sizeof(a->desc); if (a->desc_len <= i) goto same; diff --git a/security/keys/persistent.c b/security/keys/persistent.c index 90303fe4a394..9944d855a28d 100644 --- a/security/keys/persistent.c +++ b/security/keys/persistent.c @@ -84,6 +84,7 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid, long ret; /* Look in the register if it exists */ + memset(&index_key, 0, sizeof(index_key)); index_key.type = &key_type_keyring; index_key.description = buf; index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); -- cgit v1.2.3 From 218e6424e711ceee31eeba93212fed8ee92d6a11 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:32 +0100 Subject: keys: Garbage collect keys for which the domain has been removed If a key operation domain (such as a network namespace) has been removed then attempt to garbage collect all the keys that use it. Signed-off-by: David Howells --- include/linux/key.h | 2 ++ security/keys/internal.h | 3 ++- security/keys/keyring.c | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index abc68555bac3..60c076c6e47f 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,6 +278,7 @@ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); extern bool key_put_tag(struct key_tag *tag); +extern void key_remove_domain(struct key_tag *domain_tag); static inline struct key *__key_get(struct key *key) { @@ -446,6 +447,7 @@ extern void key_init(void); #define key_fsgid_changed(c) do { } while(0) #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) +#define key_remove_domain(d) do { } while(0) #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/security/keys/internal.h b/security/keys/internal.h index d3a9439e2386..5a561f5f199e 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -209,7 +209,8 @@ static inline bool key_is_dead(const struct key *key, time64_t limit) return key->flags & ((1 << KEY_FLAG_DEAD) | (1 << KEY_FLAG_INVALIDATED)) || - (key->expiry > 0 && key->expiry <= limit); + (key->expiry > 0 && key->expiry <= limit) || + key->domain_tag->removed; } /* diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 0da8fa282d56..d3c86fda1510 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -241,6 +241,21 @@ bool key_put_tag(struct key_tag *tag) return false; } +/** + * key_remove_domain - Kill off a key domain and gc its keys + * @domain_tag: The domain tag to release. + * + * This marks a domain tag as being dead and releases a ref on it. If that + * wasn't the last reference, the garbage collector is poked to try and delete + * all keys that were in the domain. + */ +void key_remove_domain(struct key_tag *domain_tag) +{ + domain_tag->removed = true; + if (!key_put_tag(domain_tag)) + key_schedule_gc_links(); +} + /* * Build the next index key chunk. * -- cgit v1.2.3 From 9b242610514fe387ef957bce05e1fdd3efd60359 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jun 2019 21:02:33 +0100 Subject: keys: Network namespace domain tag Create key domain tags for network namespaces and make it possible to automatically tag keys that are used by networked services (e.g. AF_RXRPC, AFS, DNS) with the default network namespace if not set by the caller. This allows keys with the same description but in different namespaces to coexist within a keyring. Signed-off-by: David Howells cc: netdev@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-afs@lists.infradead.org --- include/linux/key-type.h | 3 +++ include/net/net_namespace.h | 3 +++ net/core/net_namespace.c | 20 ++++++++++++++++++++ net/dns_resolver/dns_key.c | 1 + net/rxrpc/key.c | 2 ++ security/keys/keyring.c | 7 ++++++- 6 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index e49d1de0614e..2148a6bf58f1 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -74,6 +74,9 @@ struct key_type { */ size_t def_datalen; + unsigned int flags; +#define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */ + /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 12689ddfc24c..a56bf7fc7c2b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -71,6 +71,9 @@ struct net { */ struct llist_node cleanup_list; /* namespaces on death row */ +#ifdef CONFIG_KEYS + struct key_tag *key_domain; /* Key domain of operation tag */ +#endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; spinlock_t nsid_lock; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 711b161505ac..88e603b85ed2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -38,9 +38,16 @@ EXPORT_SYMBOL_GPL(net_namespace_list); DECLARE_RWSEM(net_rwsem); EXPORT_SYMBOL_GPL(net_rwsem); +#ifdef CONFIG_KEYS +static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; +#endif + struct net init_net = { .count = REFCOUNT_INIT(1), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +#ifdef CONFIG_KEYS + .key_domain = &init_net_key_domain, +#endif }; EXPORT_SYMBOL(init_net); @@ -386,10 +393,22 @@ static struct net *net_alloc(void) if (!net) goto out_free; +#ifdef CONFIG_KEYS + net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL); + if (!net->key_domain) + goto out_free_2; + refcount_set(&net->key_domain->usage, 1); +#endif + rcu_assign_pointer(net->gen, ng); out: return net; +#ifdef CONFIG_KEYS +out_free_2: + kmem_cache_free(net_cachep, net); + net = NULL; +#endif out_free: kfree(ng); goto out; @@ -566,6 +585,7 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); + key_remove_domain(net->key_domain); put_user_ns(net->user_ns); net_drop_ns(net); } diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index a65d553e730d..3e1a90669006 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -314,6 +314,7 @@ static long dns_resolver_read(const struct key *key, struct key_type key_type_dns_resolver = { .name = "dns_resolver", + .flags = KEY_TYPE_NET_DOMAIN, .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index e7f6b8823eb6..2722189ec273 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -43,6 +43,7 @@ static long rxrpc_read(const struct key *, char __user *, size_t); */ struct key_type key_type_rxrpc = { .name = "rxrpc", + .flags = KEY_TYPE_NET_DOMAIN, .preparse = rxrpc_preparse, .free_preparse = rxrpc_free_preparse, .instantiate = generic_key_instantiate, @@ -58,6 +59,7 @@ EXPORT_SYMBOL(key_type_rxrpc); */ struct key_type key_type_rxrpc_s = { .name = "rxrpc_s", + .flags = KEY_TYPE_NET_DOMAIN, .vet_description = rxrpc_vet_description_s, .preparse = rxrpc_preparse_s, .free_preparse = rxrpc_free_preparse_s, diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d3c86fda1510..bca070f6ab46 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -17,10 +17,12 @@ #include #include #include +#include #include #include #include #include +#include #include "internal.h" /* @@ -220,7 +222,10 @@ void key_set_index_key(struct keyring_index_key *index_key) memcpy(index_key->desc, index_key->description, n); - index_key->domain_tag = &default_domain_tag; + if (index_key->type->flags & KEY_TYPE_NET_DOMAIN) + index_key->domain_tag = current->nsproxy->net_ns->key_domain; + else + index_key->domain_tag = &default_domain_tag; hash_key_type_and_desc(index_key); } -- cgit v1.2.3 From 96125bf9985a75db00496dd2bc9249b777d2b19b Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Sat, 22 Jun 2019 10:07:34 -0700 Subject: Allow 0.0.0.0/8 as a valid address range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The longstanding prohibition against using 0.0.0.0/8 dates back to two issues with the early internet. There was an interoperability problem with BSD 4.2 in 1984, fixed in BSD 4.3 in 1986. BSD 4.2 has long since been retired. Secondly, addresses of the form 0.x.y.z were initially defined only as a source address in an ICMP datagram, indicating "node number x.y.z on this IPv4 network", by nodes that know their address on their local network, but do not yet know their network prefix, in RFC0792 (page 19). This usage of 0.x.y.z was later repealed in RFC1122 (section 3.2.2.7), because the original ICMP-based mechanism for learning the network prefix was unworkable on many networks such as Ethernet (which have longer addresses that would not fit into the 24 "node number" bits). Modern networks use reverse ARP (RFC0903) or BOOTP (RFC0951) or DHCP (RFC2131) to find their full 32-bit address and CIDR netmask (and other parameters such as default gateways). 0.x.y.z has had 16,777,215 addresses in 0.0.0.0/8 space left unused and reserved for future use, since 1989. This patch allows for these 16m new IPv4 addresses to appear within a box or on the wire. Layer 2 switches don't care. 0.0.0.0/32 is still prohibited, of course. Signed-off-by: Dave Taht Signed-off-by: John Gilmore Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 4d2fedfb753a..1873ef642605 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -63,7 +63,7 @@ static inline bool ipv4_is_all_snoopers(__be32 addr) static inline bool ipv4_is_zeronet(__be32 addr) { - return (addr & htonl(0xff000000)) == htonl(0x00000000); + return (addr == 0); } /* Special-Use IPv4 Addresses (RFC3330) */ -- cgit v1.2.3 From 2a6a7aacd4e557a4c7007f8858bcc9654b098fea Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Jun 2019 10:24:32 +0300 Subject: mfd: regulator: clk: Split rohm-bd718x7.h Split the bd718x7.h to ROHM common and bd718x7 specific parts so that we do not need to add same things in every new ROHM PMIC header. Please note that this change requires changes also in bd718x7 sub-device drivers for regulators and clk. Signed-off-by: Matti Vaittinen Acked-by: Mark Brown Acked-by: Stephen Boyd Signed-off-by: Lee Jones --- drivers/clk/clk-bd718x7.c | 6 +++--- drivers/mfd/rohm-bd718x7.c | 23 ++++++++++++----------- drivers/regulator/bd718x7-regulator.c | 25 +++++++++++++------------ include/linux/mfd/rohm-bd718x7.h | 22 ++++++++-------------- include/linux/mfd/rohm-generic.h | 20 ++++++++++++++++++++ 5 files changed, 56 insertions(+), 40 deletions(-) create mode 100644 include/linux/mfd/rohm-generic.h (limited to 'include/linux') diff --git a/drivers/clk/clk-bd718x7.c b/drivers/clk/clk-bd718x7.c index 60422c72d142..461228ebf703 100644 --- a/drivers/clk/clk-bd718x7.c +++ b/drivers/clk/clk-bd718x7.c @@ -17,7 +17,7 @@ struct bd718xx_clk { u8 reg; u8 mask; struct platform_device *pdev; - struct bd718xx *mfd; + struct rohm_regmap_dev *mfd; }; static int bd71837_clk_set(struct clk_hw *hw, int status) @@ -68,7 +68,7 @@ static int bd71837_clk_probe(struct platform_device *pdev) int rval = -ENOMEM; const char *parent_clk; struct device *parent = pdev->dev.parent; - struct bd718xx *mfd = dev_get_drvdata(parent); + struct rohm_regmap_dev *mfd = dev_get_drvdata(parent); struct clk_init_data init = { .name = "bd718xx-32k-out", .ops = &bd71837_clk_ops, @@ -119,5 +119,5 @@ static struct platform_driver bd71837_clk = { module_platform_driver(bd71837_clk); MODULE_AUTHOR("Matti Vaittinen "); -MODULE_DESCRIPTION("BD71837 chip clk driver"); +MODULE_DESCRIPTION("BD71837/BD71847 chip clk driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/rohm-bd718x7.c b/drivers/mfd/rohm-bd718x7.c index a29d529a96f4..7beb444a57cb 100644 --- a/drivers/mfd/rohm-bd718x7.c +++ b/drivers/mfd/rohm-bd718x7.c @@ -98,18 +98,19 @@ static int bd718xx_i2c_probe(struct i2c_client *i2c, return -ENOMEM; bd718xx->chip_irq = i2c->irq; - bd718xx->chip_type = (unsigned int)(uintptr_t) + bd718xx->chip.chip_type = (unsigned int)(uintptr_t) of_device_get_match_data(&i2c->dev); - bd718xx->dev = &i2c->dev; + bd718xx->chip.dev = &i2c->dev; dev_set_drvdata(&i2c->dev, bd718xx); - bd718xx->regmap = devm_regmap_init_i2c(i2c, &bd718xx_regmap_config); - if (IS_ERR(bd718xx->regmap)) { + bd718xx->chip.regmap = devm_regmap_init_i2c(i2c, + &bd718xx_regmap_config); + if (IS_ERR(bd718xx->chip.regmap)) { dev_err(&i2c->dev, "regmap initialization failed\n"); - return PTR_ERR(bd718xx->regmap); + return PTR_ERR(bd718xx->chip.regmap); } - ret = devm_regmap_add_irq_chip(&i2c->dev, bd718xx->regmap, + ret = devm_regmap_add_irq_chip(&i2c->dev, bd718xx->chip.regmap, bd718xx->chip_irq, IRQF_ONESHOT, 0, &bd718xx_irq_chip, &bd718xx->irq_data); if (ret) { @@ -118,7 +119,7 @@ static int bd718xx_i2c_probe(struct i2c_client *i2c, } /* Configure short press to 10 milliseconds */ - ret = regmap_update_bits(bd718xx->regmap, + ret = regmap_update_bits(bd718xx->chip.regmap, BD718XX_REG_PWRONCONFIG0, BD718XX_PWRBTN_PRESS_DURATION_MASK, BD718XX_PWRBTN_SHORT_PRESS_10MS); @@ -129,7 +130,7 @@ static int bd718xx_i2c_probe(struct i2c_client *i2c, } /* Configure long press to 10 seconds */ - ret = regmap_update_bits(bd718xx->regmap, + ret = regmap_update_bits(bd718xx->chip.regmap, BD718XX_REG_PWRONCONFIG1, BD718XX_PWRBTN_PRESS_DURATION_MASK, BD718XX_PWRBTN_LONG_PRESS_10S); @@ -149,7 +150,7 @@ static int bd718xx_i2c_probe(struct i2c_client *i2c, button.irq = ret; - ret = devm_mfd_add_devices(bd718xx->dev, PLATFORM_DEVID_AUTO, + ret = devm_mfd_add_devices(bd718xx->chip.dev, PLATFORM_DEVID_AUTO, bd718xx_mfd_cells, ARRAY_SIZE(bd718xx_mfd_cells), NULL, 0, regmap_irq_get_domain(bd718xx->irq_data)); @@ -162,11 +163,11 @@ static int bd718xx_i2c_probe(struct i2c_client *i2c, static const struct of_device_id bd718xx_of_match[] = { { .compatible = "rohm,bd71837", - .data = (void *)BD718XX_TYPE_BD71837, + .data = (void *)ROHM_CHIP_TYPE_BD71837, }, { .compatible = "rohm,bd71847", - .data = (void *)BD718XX_TYPE_BD71847, + .data = (void *)ROHM_CHIP_TYPE_BD71847, }, { } }; diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index fde4264da6ff..ef2fc175a9ae 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -1152,12 +1152,12 @@ static int bd718xx_probe(struct platform_device *pdev) { struct bd718xx *mfd; struct regulator_config config = { 0 }; - struct bd718xx_pmic_inits pmic_regulators[] = { - [BD718XX_TYPE_BD71837] = { + struct bd718xx_pmic_inits pmic_regulators[ROHM_CHIP_TYPE_AMOUNT] = { + [ROHM_CHIP_TYPE_BD71837] = { .r_datas = bd71837_regulators, .r_amount = ARRAY_SIZE(bd71837_regulators), }, - [BD718XX_TYPE_BD71847] = { + [ROHM_CHIP_TYPE_BD71847] = { .r_datas = bd71847_regulators, .r_amount = ARRAY_SIZE(bd71847_regulators), }, @@ -1173,15 +1173,15 @@ static int bd718xx_probe(struct platform_device *pdev) goto err; } - if (mfd->chip_type >= BD718XX_TYPE_AMOUNT || - !pmic_regulators[mfd->chip_type].r_datas) { + if (mfd->chip.chip_type >= ROHM_CHIP_TYPE_AMOUNT || + !pmic_regulators[mfd->chip.chip_type].r_datas) { dev_err(&pdev->dev, "Unsupported chip type\n"); err = -EINVAL; goto err; } /* Register LOCK release */ - err = regmap_update_bits(mfd->regmap, BD718XX_REG_REGLOCK, + err = regmap_update_bits(mfd->chip.regmap, BD718XX_REG_REGLOCK, (REGLOCK_PWRSEQ | REGLOCK_VREG), 0); if (err) { dev_err(&pdev->dev, "Failed to unlock PMIC (%d)\n", err); @@ -1200,7 +1200,8 @@ static int bd718xx_probe(struct platform_device *pdev) * bit allowing HW defaults for power rails to be used */ if (!use_snvs) { - err = regmap_update_bits(mfd->regmap, BD718XX_REG_TRANS_COND1, + err = regmap_update_bits(mfd->chip.regmap, + BD718XX_REG_TRANS_COND1, BD718XX_ON_REQ_POWEROFF_MASK | BD718XX_SWRESET_POWEROFF_MASK | BD718XX_WDOG_POWEROFF_MASK | @@ -1215,17 +1216,17 @@ static int bd718xx_probe(struct platform_device *pdev) } } - for (i = 0; i < pmic_regulators[mfd->chip_type].r_amount; i++) { + for (i = 0; i < pmic_regulators[mfd->chip.chip_type].r_amount; i++) { const struct regulator_desc *desc; struct regulator_dev *rdev; const struct bd718xx_regulator_data *r; - r = &pmic_regulators[mfd->chip_type].r_datas[i]; + r = &pmic_regulators[mfd->chip.chip_type].r_datas[i]; desc = &r->desc; config.dev = pdev->dev.parent; - config.regmap = mfd->regmap; + config.regmap = mfd->chip.regmap; rdev = devm_regulator_register(&pdev->dev, desc, &config); if (IS_ERR(rdev)) { @@ -1254,7 +1255,7 @@ static int bd718xx_probe(struct platform_device *pdev) */ if (!use_snvs || !rdev->constraints->always_on || !rdev->constraints->boot_on) { - err = regmap_update_bits(mfd->regmap, r->init.reg, + err = regmap_update_bits(mfd->chip.regmap, r->init.reg, r->init.mask, r->init.val); if (err) { dev_err(&pdev->dev, @@ -1264,7 +1265,7 @@ static int bd718xx_probe(struct platform_device *pdev) } } for (j = 0; j < r->additional_init_amnt; j++) { - err = regmap_update_bits(mfd->regmap, + err = regmap_update_bits(mfd->chip.regmap, r->additional_inits[j].reg, r->additional_inits[j].mask, r->additional_inits[j].val); diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h index fd194bfc836f..7f2dbde402a1 100644 --- a/include/linux/mfd/rohm-bd718x7.h +++ b/include/linux/mfd/rohm-bd718x7.h @@ -4,14 +4,9 @@ #ifndef __LINUX_MFD_BD718XX_H__ #define __LINUX_MFD_BD718XX_H__ +#include #include -enum { - BD718XX_TYPE_BD71837 = 0, - BD718XX_TYPE_BD71847, - BD718XX_TYPE_AMOUNT -}; - enum { BD718XX_BUCK1 = 0, BD718XX_BUCK2, @@ -321,18 +316,17 @@ enum { BD718XX_PWRBTN_LONG_PRESS_15S }; -struct bd718xx_clk; - struct bd718xx { - unsigned int chip_type; - struct device *dev; - struct regmap *regmap; - unsigned long int id; + /* + * Please keep this as the first member here as some + * drivers (clk) supporting more than one chip may only know this + * generic struct 'struct rohm_regmap_dev' and assume it is + * the first chunk of parent device's private data. + */ + struct rohm_regmap_dev chip; int chip_irq; struct regmap_irq_chip_data *irq_data; - - struct bd718xx_clk *clk; }; #endif /* __LINUX_MFD_BD718XX_H__ */ diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h new file mode 100644 index 000000000000..bff15ac26f2c --- /dev/null +++ b/include/linux/mfd/rohm-generic.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (C) 2018 ROHM Semiconductors */ + +#ifndef __LINUX_MFD_ROHM_H__ +#define __LINUX_MFD_ROHM_H__ + +enum { + ROHM_CHIP_TYPE_BD71837 = 0, + ROHM_CHIP_TYPE_BD71847, + ROHM_CHIP_TYPE_BD70528, + ROHM_CHIP_TYPE_AMOUNT +}; + +struct rohm_regmap_dev { + unsigned int chip_type; + struct device *dev; + struct regmap *regmap; +}; + +#endif -- cgit v1.2.3 From 21b7c58fc1943f3aa8c18a994ab9bed4ae5aa72d Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Jun 2019 10:25:08 +0300 Subject: mfd: bd70528: Support ROHM bd70528 PMIC core ROHM BD70528MWV is an ultra-low quiescent current general purpose single-chip power management IC for battery-powered portable devices. Add MFD core which enables chip access for following subdevices: - regulators/LED drivers - battery-charger - gpios - 32.768kHz clk - RTC - watchdog Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 17 ++ drivers/mfd/Makefile | 2 + drivers/mfd/rohm-bd70528.c | 316 ++++++++++++++++++++++++++++++ include/linux/mfd/rohm-bd70528.h | 408 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 743 insertions(+) create mode 100644 drivers/mfd/rohm-bd70528.c create mode 100644 include/linux/mfd/rohm-bd70528.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 294d9567cc71..11fc53d78c5f 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1890,6 +1890,23 @@ config MFD_ROHM_BD718XX NXP i.MX8. It contains 8 BUCK outputs and 7 LDOs, voltage monitoring and emergency shut down as well as 32,768KHz clock output. +config MFD_ROHM_BD70528 + tristate "ROHM BD70528 Power Management IC" + depends on I2C=y + depends on OF + select REGMAP_I2C + select REGMAP_IRQ + select MFD_CORE + help + Select this option to get support for the ROHM BD70528 Power + Management IC. BD71837 is general purpose single-chip power + management IC for battery-powered portable devices. It contains + 3 ultra-low current consumption buck converters, 3 LDOs and 2 LED + drivers. Also included are 4 GPIOs, a real-time clock (RTC), a 32kHz + crystal oscillator, high-accuracy VREF for use with an external ADC, + 10 bits SAR ADC for battery temperature monitor and 1S battery + charger. + config MFD_STM32_LPTIMER tristate "Support for STM32 Low-Power Timer" depends on (ARCH_STM32 && OF) || COMPILE_TEST diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 52b1a90ff515..643d65bcb6ea 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -247,5 +247,7 @@ obj-$(CONFIG_MFD_STM32_TIMERS) += stm32-timers.o obj-$(CONFIG_MFD_MXS_LRADC) += mxs-lradc.o obj-$(CONFIG_MFD_SC27XX_PMIC) += sprd-sc27xx-spi.o obj-$(CONFIG_RAVE_SP_CORE) += rave-sp.o +obj-$(CONFIG_MFD_ROHM_BD70528) += rohm-bd70528.o obj-$(CONFIG_MFD_ROHM_BD718XX) += rohm-bd718x7.o obj-$(CONFIG_MFD_STMFX) += stmfx.o + diff --git a/drivers/mfd/rohm-bd70528.c b/drivers/mfd/rohm-bd70528.c new file mode 100644 index 000000000000..55599d5c5c86 --- /dev/null +++ b/drivers/mfd/rohm-bd70528.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// +// Copyright (C) 2019 ROHM Semiconductors +// +// ROHM BD70528 PMIC driver + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BD70528_NUM_OF_GPIOS 4 + +static const struct resource rtc_irqs[] = { + DEFINE_RES_IRQ_NAMED(BD70528_INT_RTC_ALARM, "bd70528-rtc-alm"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_ELPS_TIM, "bd70528-elapsed-timer"), +}; + +static const struct resource charger_irqs[] = { + DEFINE_RES_IRQ_NAMED(BD70528_INT_BAT_OV_RES, "bd70528-bat-ov-res"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BAT_OV_DET, "bd70528-bat-ov-det"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DBAT_DET, "bd70528-bat-dead"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BATTSD_COLD_RES, "bd70528-bat-warmed"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BATTSD_COLD_DET, "bd70528-bat-cold"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BATTSD_HOT_RES, "bd70528-bat-cooled"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BATTSD_HOT_DET, "bd70528-bat-hot"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_CHG_TSD, "bd70528-chg-tshd"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BAT_RMV, "bd70528-bat-removed"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_BAT_DET, "bd70528-bat-detected"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN2_OV_RES, "bd70528-dcin2-ov-res"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN2_OV_DET, "bd70528-dcin2-ov-det"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN2_RMV, "bd70528-dcin2-removed"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN2_DET, "bd70528-dcin2-detected"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN1_RMV, "bd70528-dcin1-removed"), + DEFINE_RES_IRQ_NAMED(BD70528_INT_DCIN1_DET, "bd70528-dcin1-detected"), +}; + +static struct mfd_cell bd70528_mfd_cells[] = { + { .name = "bd70528-pmic", }, + { .name = "bd70528-gpio", }, + /* + * We use BD71837 driver to drive the clock block. Only differences to + * BD70528 clock gate are the register address and mask. + */ + { .name = "bd718xx-clk", }, + { .name = "bd70528-wdt", }, + { + .name = "bd70528-power", + .resources = charger_irqs, + .num_resources = ARRAY_SIZE(charger_irqs), + }, { + .name = "bd70528-rtc", + .resources = rtc_irqs, + .num_resources = ARRAY_SIZE(rtc_irqs), + }, +}; + +static const struct regmap_range volatile_ranges[] = { + { + .range_min = BD70528_REG_INT_MAIN, + .range_max = BD70528_REG_INT_OP_FAIL, + }, { + .range_min = BD70528_REG_RTC_COUNT_H, + .range_max = BD70528_REG_RTC_ALM_REPEAT, + }, { + /* + * WDT control reg is special. Magic values must be written to + * it in order to change the control. Should not be cached. + */ + .range_min = BD70528_REG_WDT_CTRL, + .range_max = BD70528_REG_WDT_CTRL, + }, { + /* + * BD70528 also contains a few other registers which require + * magic sequences to be written in order to update the value. + * At least SHIPMODE, HWRESET, WARMRESET,and STANDBY + */ + .range_min = BD70528_REG_SHIPMODE, + .range_max = BD70528_REG_STANDBY, + }, +}; + +static const struct regmap_access_table volatile_regs = { + .yes_ranges = &volatile_ranges[0], + .n_yes_ranges = ARRAY_SIZE(volatile_ranges), +}; + +static struct regmap_config bd70528_regmap = { + .reg_bits = 8, + .val_bits = 8, + .volatile_table = &volatile_regs, + .max_register = BD70528_MAX_REGISTER, + .cache_type = REGCACHE_RBTREE, +}; + +/* + * Mapping of main IRQ register bits to sub-IRQ register offsets so that we can + * access corect sub-IRQ registers based on bits that are set in main IRQ + * register. + */ + +/* bit [0] - Shutdown register */ +unsigned int bit0_offsets[] = {0}; /* Shutdown register */ +unsigned int bit1_offsets[] = {1}; /* Power failure register */ +unsigned int bit2_offsets[] = {2}; /* VR FAULT register */ +unsigned int bit3_offsets[] = {3}; /* PMU register interrupts */ +unsigned int bit4_offsets[] = {4, 5}; /* Charger 1 and Charger 2 registers */ +unsigned int bit5_offsets[] = {6}; /* RTC register */ +unsigned int bit6_offsets[] = {7}; /* GPIO register */ +unsigned int bit7_offsets[] = {8}; /* Invalid operation register */ + +static struct regmap_irq_sub_irq_map bd70528_sub_irq_offsets[] = { + REGMAP_IRQ_MAIN_REG_OFFSET(bit0_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit1_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit2_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit3_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit4_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit5_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit6_offsets), + REGMAP_IRQ_MAIN_REG_OFFSET(bit7_offsets), +}; + +static struct regmap_irq bd70528_irqs[] = { + REGMAP_IRQ_REG(BD70528_INT_LONGPUSH, 0, BD70528_INT_LONGPUSH_MASK), + REGMAP_IRQ_REG(BD70528_INT_WDT, 0, BD70528_INT_WDT_MASK), + REGMAP_IRQ_REG(BD70528_INT_HWRESET, 0, BD70528_INT_HWRESET_MASK), + REGMAP_IRQ_REG(BD70528_INT_RSTB_FAULT, 0, BD70528_INT_RSTB_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_VBAT_UVLO, 0, BD70528_INT_VBAT_UVLO_MASK), + REGMAP_IRQ_REG(BD70528_INT_TSD, 0, BD70528_INT_TSD_MASK), + REGMAP_IRQ_REG(BD70528_INT_RSTIN, 0, BD70528_INT_RSTIN_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK1_FAULT, 1, + BD70528_INT_BUCK1_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK2_FAULT, 1, + BD70528_INT_BUCK2_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK3_FAULT, 1, + BD70528_INT_BUCK3_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_LDO1_FAULT, 1, BD70528_INT_LDO1_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_LDO2_FAULT, 1, BD70528_INT_LDO2_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_LDO3_FAULT, 1, BD70528_INT_LDO3_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED1_FAULT, 1, BD70528_INT_LED1_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED2_FAULT, 1, BD70528_INT_LED2_FAULT_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK1_OCP, 2, BD70528_INT_BUCK1_OCP_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK2_OCP, 2, BD70528_INT_BUCK2_OCP_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK3_OCP, 2, BD70528_INT_BUCK3_OCP_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED1_OCP, 2, BD70528_INT_LED1_OCP_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED2_OCP, 2, BD70528_INT_LED2_OCP_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK1_FULLON, 2, + BD70528_INT_BUCK1_FULLON_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK2_FULLON, 2, + BD70528_INT_BUCK2_FULLON_MASK), + REGMAP_IRQ_REG(BD70528_INT_SHORTPUSH, 3, BD70528_INT_SHORTPUSH_MASK), + REGMAP_IRQ_REG(BD70528_INT_AUTO_WAKEUP, 3, + BD70528_INT_AUTO_WAKEUP_MASK), + REGMAP_IRQ_REG(BD70528_INT_STATE_CHANGE, 3, + BD70528_INT_STATE_CHANGE_MASK), + REGMAP_IRQ_REG(BD70528_INT_BAT_OV_RES, 4, BD70528_INT_BAT_OV_RES_MASK), + REGMAP_IRQ_REG(BD70528_INT_BAT_OV_DET, 4, BD70528_INT_BAT_OV_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_DBAT_DET, 4, BD70528_INT_DBAT_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_BATTSD_COLD_RES, 4, + BD70528_INT_BATTSD_COLD_RES_MASK), + REGMAP_IRQ_REG(BD70528_INT_BATTSD_COLD_DET, 4, + BD70528_INT_BATTSD_COLD_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_BATTSD_HOT_RES, 4, + BD70528_INT_BATTSD_HOT_RES_MASK), + REGMAP_IRQ_REG(BD70528_INT_BATTSD_HOT_DET, 4, + BD70528_INT_BATTSD_HOT_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_CHG_TSD, 4, BD70528_INT_CHG_TSD_MASK), + REGMAP_IRQ_REG(BD70528_INT_BAT_RMV, 5, BD70528_INT_BAT_RMV_MASK), + REGMAP_IRQ_REG(BD70528_INT_BAT_DET, 5, BD70528_INT_BAT_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN2_OV_RES, 5, + BD70528_INT_DCIN2_OV_RES_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN2_OV_DET, 5, + BD70528_INT_DCIN2_OV_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN2_RMV, 5, BD70528_INT_DCIN2_RMV_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN2_DET, 5, BD70528_INT_DCIN2_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN1_RMV, 5, BD70528_INT_DCIN1_RMV_MASK), + REGMAP_IRQ_REG(BD70528_INT_DCIN1_DET, 5, BD70528_INT_DCIN1_DET_MASK), + REGMAP_IRQ_REG(BD70528_INT_RTC_ALARM, 6, BD70528_INT_RTC_ALARM_MASK), + REGMAP_IRQ_REG(BD70528_INT_ELPS_TIM, 6, BD70528_INT_ELPS_TIM_MASK), + REGMAP_IRQ_REG(BD70528_INT_GPIO0, 7, BD70528_INT_GPIO0_MASK), + REGMAP_IRQ_REG(BD70528_INT_GPIO1, 7, BD70528_INT_GPIO1_MASK), + REGMAP_IRQ_REG(BD70528_INT_GPIO2, 7, BD70528_INT_GPIO2_MASK), + REGMAP_IRQ_REG(BD70528_INT_GPIO3, 7, BD70528_INT_GPIO3_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK1_DVS_OPFAIL, 8, + BD70528_INT_BUCK1_DVS_OPFAIL_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK2_DVS_OPFAIL, 8, + BD70528_INT_BUCK2_DVS_OPFAIL_MASK), + REGMAP_IRQ_REG(BD70528_INT_BUCK3_DVS_OPFAIL, 8, + BD70528_INT_BUCK3_DVS_OPFAIL_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED1_VOLT_OPFAIL, 8, + BD70528_INT_LED1_VOLT_OPFAIL_MASK), + REGMAP_IRQ_REG(BD70528_INT_LED2_VOLT_OPFAIL, 8, + BD70528_INT_LED2_VOLT_OPFAIL_MASK), +}; + +static struct regmap_irq_chip bd70528_irq_chip = { + .name = "bd70528_irq", + .main_status = BD70528_REG_INT_MAIN, + .irqs = &bd70528_irqs[0], + .num_irqs = ARRAY_SIZE(bd70528_irqs), + .status_base = BD70528_REG_INT_SHDN, + .mask_base = BD70528_REG_INT_SHDN_MASK, + .ack_base = BD70528_REG_INT_SHDN, + .type_base = BD70528_REG_GPIO1_IN, + .init_ack_masked = true, + .num_regs = 9, + .num_main_regs = 1, + .num_type_reg = 4, + .sub_reg_offsets = &bd70528_sub_irq_offsets[0], + .num_main_status_bits = 8, + .irq_reg_stride = 1, +}; + +static int bd70528_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct bd70528_data *bd70528; + struct regmap_irq_chip_data *irq_data; + int ret, i; + + if (!i2c->irq) { + dev_err(&i2c->dev, "No IRQ configured\n"); + return -EINVAL; + } + + bd70528 = devm_kzalloc(&i2c->dev, sizeof(*bd70528), GFP_KERNEL); + if (!bd70528) + return -ENOMEM; + + mutex_init(&bd70528->rtc_timer_lock); + + dev_set_drvdata(&i2c->dev, &bd70528->chip); + + bd70528->chip.chip_type = ROHM_CHIP_TYPE_BD70528; + bd70528->chip.regmap = devm_regmap_init_i2c(i2c, &bd70528_regmap); + if (IS_ERR(bd70528->chip.regmap)) { + dev_err(&i2c->dev, "Failed to initialize Regmap\n"); + return PTR_ERR(bd70528->chip.regmap); + } + + /* + * Disallow type setting for all IRQs by default as most of them do not + * support setting type. + */ + for (i = 0; i < ARRAY_SIZE(bd70528_irqs); i++) + bd70528_irqs[i].type.types_supported = 0; + + /* Set IRQ typesetting information for GPIO pins 0 - 3 */ + for (i = 0; i < BD70528_NUM_OF_GPIOS; i++) { + struct regmap_irq_type *type; + + type = &bd70528_irqs[BD70528_INT_GPIO0 + i].type; + type->type_reg_offset = 2 * i; + type->type_rising_val = 0x20; + type->type_falling_val = 0x10; + type->type_level_high_val = 0x40; + type->type_level_low_val = 0x50; + type->types_supported = (IRQ_TYPE_EDGE_BOTH | + IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW); + } + + ret = devm_regmap_add_irq_chip(&i2c->dev, bd70528->chip.regmap, + i2c->irq, IRQF_ONESHOT, 0, + &bd70528_irq_chip, &irq_data); + if (ret) { + dev_err(&i2c->dev, "Failed to add IRQ chip\n"); + return ret; + } + dev_dbg(&i2c->dev, "Registered %d IRQs for chip\n", + bd70528_irq_chip.num_irqs); + + /* + * BD70528 IRQ controller is not touching the main mask register. + * So enable the GPIO block interrupts at main level. We can just leave + * them enabled as the IRQ controller should disable IRQs from + * sub-registers when IRQ is disabled or freed. + */ + ret = regmap_update_bits(bd70528->chip.regmap, + BD70528_REG_INT_MAIN_MASK, + BD70528_INT_GPIO_MASK, 0); + + ret = devm_mfd_add_devices(&i2c->dev, PLATFORM_DEVID_AUTO, + bd70528_mfd_cells, + ARRAY_SIZE(bd70528_mfd_cells), NULL, 0, + regmap_irq_get_domain(irq_data)); + if (ret) + dev_err(&i2c->dev, "Failed to create subdevices\n"); + + return ret; +} + +static const struct of_device_id bd70528_of_match[] = { + { .compatible = "rohm,bd70528", }, + { }, +}; +MODULE_DEVICE_TABLE(of, bd70528_of_match); + +static struct i2c_driver bd70528_drv = { + .driver = { + .name = "rohm-bd70528", + .of_match_table = bd70528_of_match, + }, + .probe = &bd70528_i2c_probe, +}; + +module_i2c_driver(bd70528_drv); + +MODULE_AUTHOR("Matti Vaittinen "); +MODULE_DESCRIPTION("ROHM BD70528 Power Management IC driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h new file mode 100644 index 000000000000..1013e60c5b25 --- /dev/null +++ b/include/linux/mfd/rohm-bd70528.h @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (C) 2018 ROHM Semiconductors */ + +#ifndef __LINUX_MFD_BD70528_H__ +#define __LINUX_MFD_BD70528_H__ + +#include +#include +#include +#include + +enum { + BD70528_BUCK1, + BD70528_BUCK2, + BD70528_BUCK3, + BD70528_LDO1, + BD70528_LDO2, + BD70528_LDO3, + BD70528_LED1, + BD70528_LED2, +}; + +struct bd70528_data { + struct rohm_regmap_dev chip; + struct mutex rtc_timer_lock; +}; + +#define BD70528_BUCK_VOLTS 17 +#define BD70528_BUCK_VOLTS 17 +#define BD70528_BUCK_VOLTS 17 +#define BD70528_LDO_VOLTS 0x20 + +#define BD70528_REG_BUCK1_EN 0x0F +#define BD70528_REG_BUCK1_VOLT 0x15 +#define BD70528_REG_BUCK2_EN 0x10 +#define BD70528_REG_BUCK2_VOLT 0x16 +#define BD70528_REG_BUCK3_EN 0x11 +#define BD70528_REG_BUCK3_VOLT 0x17 +#define BD70528_REG_LDO1_EN 0x1b +#define BD70528_REG_LDO1_VOLT 0x1e +#define BD70528_REG_LDO2_EN 0x1c +#define BD70528_REG_LDO2_VOLT 0x1f +#define BD70528_REG_LDO3_EN 0x1d +#define BD70528_REG_LDO3_VOLT 0x20 +#define BD70528_REG_LED_CTRL 0x2b +#define BD70528_REG_LED_VOLT 0x29 +#define BD70528_REG_LED_EN 0x2a + +/* main irq registers */ +#define BD70528_REG_INT_MAIN 0x7E +#define BD70528_REG_INT_MAIN_MASK 0x74 + +/* 'sub irq' registers */ +#define BD70528_REG_INT_SHDN 0x7F +#define BD70528_REG_INT_PWR_FLT 0x80 +#define BD70528_REG_INT_VR_FLT 0x81 +#define BD70528_REG_INT_MISC 0x82 +#define BD70528_REG_INT_BAT1 0x83 +#define BD70528_REG_INT_BAT2 0x84 +#define BD70528_REG_INT_RTC 0x85 +#define BD70528_REG_INT_GPIO 0x86 +#define BD70528_REG_INT_OP_FAIL 0x87 + +#define BD70528_REG_INT_SHDN_MASK 0x75 +#define BD70528_REG_INT_PWR_FLT_MASK 0x76 +#define BD70528_REG_INT_VR_FLT_MASK 0x77 +#define BD70528_REG_INT_MISC_MASK 0x78 +#define BD70528_REG_INT_BAT1_MASK 0x79 +#define BD70528_REG_INT_BAT2_MASK 0x7a +#define BD70528_REG_INT_RTC_MASK 0x7b +#define BD70528_REG_INT_GPIO_MASK 0x7c +#define BD70528_REG_INT_OP_FAIL_MASK 0x7d + +/* Reset related 'magic' registers */ +#define BD70528_REG_SHIPMODE 0x03 +#define BD70528_REG_HWRESET 0x04 +#define BD70528_REG_WARMRESET 0x05 +#define BD70528_REG_STANDBY 0x06 + +/* GPIO registers */ +#define BD70528_REG_GPIO_STATE 0x8F + +#define BD70528_REG_GPIO1_IN 0x4d +#define BD70528_REG_GPIO2_IN 0x4f +#define BD70528_REG_GPIO3_IN 0x51 +#define BD70528_REG_GPIO4_IN 0x53 +#define BD70528_REG_GPIO1_OUT 0x4e +#define BD70528_REG_GPIO2_OUT 0x50 +#define BD70528_REG_GPIO3_OUT 0x52 +#define BD70528_REG_GPIO4_OUT 0x54 + +/* clk control */ + +#define BD70528_REG_CLK_OUT 0x2c + +/* RTC */ + +#define BD70528_REG_RTC_COUNT_H 0x2d +#define BD70528_REG_RTC_COUNT_L 0x2e +#define BD70528_REG_RTC_SEC 0x2f +#define BD70528_REG_RTC_MINUTE 0x30 +#define BD70528_REG_RTC_HOUR 0x31 +#define BD70528_REG_RTC_WEEK 0x32 +#define BD70528_REG_RTC_DAY 0x33 +#define BD70528_REG_RTC_MONTH 0x34 +#define BD70528_REG_RTC_YEAR 0x35 + +#define BD70528_REG_RTC_ALM_SEC 0x36 +#define BD70528_REG_RTC_ALM_START BD70528_REG_RTC_ALM_SEC +#define BD70528_REG_RTC_ALM_MINUTE 0x37 +#define BD70528_REG_RTC_ALM_HOUR 0x38 +#define BD70528_REG_RTC_ALM_WEEK 0x39 +#define BD70528_REG_RTC_ALM_DAY 0x3a +#define BD70528_REG_RTC_ALM_MONTH 0x3b +#define BD70528_REG_RTC_ALM_YEAR 0x3c +#define BD70528_REG_RTC_ALM_MASK 0x3d +#define BD70528_REG_RTC_ALM_REPEAT 0x3e +#define BD70528_REG_RTC_START BD70528_REG_RTC_SEC + +#define BD70528_REG_RTC_WAKE_SEC 0x43 +#define BD70528_REG_RTC_WAKE_START BD70528_REG_RTC_WAKE_SEC +#define BD70528_REG_RTC_WAKE_MIN 0x44 +#define BD70528_REG_RTC_WAKE_HOUR 0x45 +#define BD70528_REG_RTC_WAKE_CTRL 0x46 + +#define BD70528_REG_ELAPSED_TIMER_EN 0x42 +#define BD70528_REG_WAKE_EN 0x46 + +/* WDT registers */ +#define BD70528_REG_WDT_CTRL 0x4A +#define BD70528_REG_WDT_HOUR 0x49 +#define BD70528_REG_WDT_MINUTE 0x48 +#define BD70528_REG_WDT_SEC 0x47 + +/* Charger / Battery */ +#define BD70528_REG_CHG_CURR_STAT 0x59 +#define BD70528_REG_CHG_BAT_STAT 0x57 +#define BD70528_REG_CHG_BAT_TEMP 0x58 +#define BD70528_REG_CHG_IN_STAT 0x56 +#define BD70528_REG_CHG_DCIN_ILIM 0x5d +#define BD70528_REG_CHG_CHG_CURR_WARM 0x61 +#define BD70528_REG_CHG_CHG_CURR_COLD 0x62 + +/* Masks for main IRQ register bits */ +enum { + BD70528_INT_SHDN, +#define BD70528_INT_SHDN_MASK BIT(BD70528_INT_SHDN) + BD70528_INT_PWR_FLT, +#define BD70528_INT_PWR_FLT_MASK BIT(BD70528_INT_PWR_FLT) + BD70528_INT_VR_FLT, +#define BD70528_INT_VR_FLT_MASK BIT(BD70528_INT_VR_FLT) + BD70528_INT_MISC, +#define BD70528_INT_MISC_MASK BIT(BD70528_INT_MISC) + BD70528_INT_BAT1, +#define BD70528_INT_BAT1_MASK BIT(BD70528_INT_BAT1) + BD70528_INT_RTC, +#define BD70528_INT_RTC_MASK BIT(BD70528_INT_RTC) + BD70528_INT_GPIO, +#define BD70528_INT_GPIO_MASK BIT(BD70528_INT_GPIO) + BD70528_INT_OP_FAIL, +#define BD70528_INT_OP_FAIL_MASK BIT(BD70528_INT_OP_FAIL) +}; + +/* IRQs */ +enum { + /* Shutdown register IRQs */ + BD70528_INT_LONGPUSH, + BD70528_INT_WDT, + BD70528_INT_HWRESET, + BD70528_INT_RSTB_FAULT, + BD70528_INT_VBAT_UVLO, + BD70528_INT_TSD, + BD70528_INT_RSTIN, + /* Power failure register IRQs */ + BD70528_INT_BUCK1_FAULT, + BD70528_INT_BUCK2_FAULT, + BD70528_INT_BUCK3_FAULT, + BD70528_INT_LDO1_FAULT, + BD70528_INT_LDO2_FAULT, + BD70528_INT_LDO3_FAULT, + BD70528_INT_LED1_FAULT, + BD70528_INT_LED2_FAULT, + /* VR FAULT register IRQs */ + BD70528_INT_BUCK1_OCP, + BD70528_INT_BUCK2_OCP, + BD70528_INT_BUCK3_OCP, + BD70528_INT_LED1_OCP, + BD70528_INT_LED2_OCP, + BD70528_INT_BUCK1_FULLON, + BD70528_INT_BUCK2_FULLON, + /* PMU register interrupts */ + BD70528_INT_SHORTPUSH, + BD70528_INT_AUTO_WAKEUP, + BD70528_INT_STATE_CHANGE, + /* Charger 1 register IRQs */ + BD70528_INT_BAT_OV_RES, + BD70528_INT_BAT_OV_DET, + BD70528_INT_DBAT_DET, + BD70528_INT_BATTSD_COLD_RES, + BD70528_INT_BATTSD_COLD_DET, + BD70528_INT_BATTSD_HOT_RES, + BD70528_INT_BATTSD_HOT_DET, + BD70528_INT_CHG_TSD, + /* Charger 2 register IRQs */ + BD70528_INT_BAT_RMV, + BD70528_INT_BAT_DET, + BD70528_INT_DCIN2_OV_RES, + BD70528_INT_DCIN2_OV_DET, + BD70528_INT_DCIN2_RMV, + BD70528_INT_DCIN2_DET, + BD70528_INT_DCIN1_RMV, + BD70528_INT_DCIN1_DET, + /* RTC register IRQs */ + BD70528_INT_RTC_ALARM, + BD70528_INT_ELPS_TIM, + /* GPIO register IRQs */ + BD70528_INT_GPIO0, + BD70528_INT_GPIO1, + BD70528_INT_GPIO2, + BD70528_INT_GPIO3, + /* Invalid operation register IRQs */ + BD70528_INT_BUCK1_DVS_OPFAIL, + BD70528_INT_BUCK2_DVS_OPFAIL, + BD70528_INT_BUCK3_DVS_OPFAIL, + BD70528_INT_LED1_VOLT_OPFAIL, + BD70528_INT_LED2_VOLT_OPFAIL, +}; + +/* Masks */ +#define BD70528_INT_LONGPUSH_MASK 0x1 +#define BD70528_INT_WDT_MASK 0x2 +#define BD70528_INT_HWRESET_MASK 0x4 +#define BD70528_INT_RSTB_FAULT_MASK 0x8 +#define BD70528_INT_VBAT_UVLO_MASK 0x10 +#define BD70528_INT_TSD_MASK 0x20 +#define BD70528_INT_RSTIN_MASK 0x40 + +#define BD70528_INT_BUCK1_FAULT_MASK 0x1 +#define BD70528_INT_BUCK2_FAULT_MASK 0x2 +#define BD70528_INT_BUCK3_FAULT_MASK 0x4 +#define BD70528_INT_LDO1_FAULT_MASK 0x8 +#define BD70528_INT_LDO2_FAULT_MASK 0x10 +#define BD70528_INT_LDO3_FAULT_MASK 0x20 +#define BD70528_INT_LED1_FAULT_MASK 0x40 +#define BD70528_INT_LED2_FAULT_MASK 0x80 + +#define BD70528_INT_BUCK1_OCP_MASK 0x1 +#define BD70528_INT_BUCK2_OCP_MASK 0x2 +#define BD70528_INT_BUCK3_OCP_MASK 0x4 +#define BD70528_INT_LED1_OCP_MASK 0x8 +#define BD70528_INT_LED2_OCP_MASK 0x10 +#define BD70528_INT_BUCK1_FULLON_MASK 0x20 +#define BD70528_INT_BUCK2_FULLON_MASK 0x40 + +#define BD70528_INT_SHORTPUSH_MASK 0x1 +#define BD70528_INT_AUTO_WAKEUP_MASK 0x2 +#define BD70528_INT_STATE_CHANGE_MASK 0x10 + +#define BD70528_INT_BAT_OV_RES_MASK 0x1 +#define BD70528_INT_BAT_OV_DET_MASK 0x2 +#define BD70528_INT_DBAT_DET_MASK 0x4 +#define BD70528_INT_BATTSD_COLD_RES_MASK 0x8 +#define BD70528_INT_BATTSD_COLD_DET_MASK 0x10 +#define BD70528_INT_BATTSD_HOT_RES_MASK 0x20 +#define BD70528_INT_BATTSD_HOT_DET_MASK 0x40 +#define BD70528_INT_CHG_TSD_MASK 0x80 + +#define BD70528_INT_BAT_RMV_MASK 0x1 +#define BD70528_INT_BAT_DET_MASK 0x2 +#define BD70528_INT_DCIN2_OV_RES_MASK 0x4 +#define BD70528_INT_DCIN2_OV_DET_MASK 0x8 +#define BD70528_INT_DCIN2_RMV_MASK 0x10 +#define BD70528_INT_DCIN2_DET_MASK 0x20 +#define BD70528_INT_DCIN1_RMV_MASK 0x40 +#define BD70528_INT_DCIN1_DET_MASK 0x80 + +#define BD70528_INT_RTC_ALARM_MASK 0x1 +#define BD70528_INT_ELPS_TIM_MASK 0x2 + +#define BD70528_INT_GPIO0_MASK 0x1 +#define BD70528_INT_GPIO1_MASK 0x2 +#define BD70528_INT_GPIO2_MASK 0x4 +#define BD70528_INT_GPIO3_MASK 0x8 + +#define BD70528_INT_BUCK1_DVS_OPFAIL_MASK 0x1 +#define BD70528_INT_BUCK2_DVS_OPFAIL_MASK 0x2 +#define BD70528_INT_BUCK3_DVS_OPFAIL_MASK 0x4 +#define BD70528_INT_LED1_VOLT_OPFAIL_MASK 0x10 +#define BD70528_INT_LED2_VOLT_OPFAIL_MASK 0x20 + +#define BD70528_DEBOUNCE_MASK 0x3 + +#define BD70528_DEBOUNCE_DISABLE 0 +#define BD70528_DEBOUNCE_15MS 1 +#define BD70528_DEBOUNCE_30MS 2 +#define BD70528_DEBOUNCE_50MS 3 + +#define BD70528_GPIO_DRIVE_MASK 0x2 +#define BD70528_GPIO_PUSH_PULL 0x0 +#define BD70528_GPIO_OPEN_DRAIN 0x2 + +#define BD70528_GPIO_OUT_EN_MASK 0x80 +#define BD70528_GPIO_OUT_ENABLE 0x80 +#define BD70528_GPIO_OUT_DISABLE 0x0 + +#define BD70528_GPIO_OUT_HI 0x1 +#define BD70528_GPIO_OUT_LO 0x0 +#define BD70528_GPIO_OUT_MASK 0x1 + +#define BD70528_GPIO_IN_STATE_BASE 1 + +#define BD70528_CLK_OUT_EN_MASK 0x1 + +/* RTC masks to mask out reserved bits */ + +#define BD70528_MASK_RTC_SEC 0x7f +#define BD70528_MASK_RTC_MINUTE 0x7f +#define BD70528_MASK_RTC_HOUR_24H 0x80 +#define BD70528_MASK_RTC_HOUR_PM 0x20 +#define BD70528_MASK_RTC_HOUR 0x1f +#define BD70528_MASK_RTC_DAY 0x3f +#define BD70528_MASK_RTC_WEEK 0x07 +#define BD70528_MASK_RTC_MONTH 0x1f +#define BD70528_MASK_RTC_YEAR 0xff +#define BD70528_MASK_RTC_COUNT_L 0x7f + +#define BD70528_MASK_ELAPSED_TIMER_EN 0x1 +/* Mask second, min and hour fields + * HW would support ALM irq for over 24h + * (by setting day, month and year too) + * but as we wish to keep this same as for + * wake-up we limit ALM to 24H and only + * unmask sec, min and hour + */ +#define BD70528_MASK_ALM_EN 0x7 +#define BD70528_MASK_WAKE_EN 0x1 + +/* WDT masks */ +#define BD70528_MASK_WDT_EN 0x1 +#define BD70528_MASK_WDT_HOUR 0x1 +#define BD70528_MASK_WDT_MINUTE 0x7f +#define BD70528_MASK_WDT_SEC 0x7f + +#define BD70528_WDT_STATE_BIT 0x1 +#define BD70528_ELAPSED_STATE_BIT 0x2 +#define BD70528_WAKE_STATE_BIT 0x4 + +/* Charger masks */ +#define BD70528_MASK_CHG_STAT 0x7f +#define BD70528_MASK_CHG_BAT_TIMER 0x20 +#define BD70528_MASK_CHG_BAT_OVERVOLT 0x10 +#define BD70528_MASK_CHG_BAT_DETECT 0x1 +#define BD70528_MASK_CHG_DCIN1_UVLO 0x1 +#define BD70528_MASK_CHG_DCIN_ILIM 0x3f +#define BD70528_MASK_CHG_CHG_CURR 0x1f +#define BD70528_MASK_CHG_TRICKLE_CURR 0x10 + +/* + * Note, external battery register is the lonely rider at + * address 0xc5. See how to stuff that in the regmap + */ +#define BD70528_MAX_REGISTER 0x94 + +/* Buck control masks */ +#define BD70528_MASK_RUN_EN 0x4 +#define BD70528_MASK_STBY_EN 0x2 +#define BD70528_MASK_IDLE_EN 0x1 +#define BD70528_MASK_LED1_EN 0x1 +#define BD70528_MASK_LED2_EN 0x10 + +#define BD70528_MASK_BUCK_VOLT 0xf +#define BD70528_MASK_LDO_VOLT 0x1f +#define BD70528_MASK_LED1_VOLT 0x1 +#define BD70528_MASK_LED2_VOLT 0x10 + +/* Misc irq masks */ +#define BD70528_INT_MASK_SHORT_PUSH 1 +#define BD70528_INT_MASK_AUTO_WAKE 2 +#define BD70528_INT_MASK_POWER_STATE 4 + +#define BD70528_MASK_BUCK_RAMP 0x10 +#define BD70528_SIFT_BUCK_RAMP 4 + +#if IS_ENABLED(CONFIG_BD70528_WATCHDOG) + +int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, int *old_state); +void bd70528_wdt_lock(struct rohm_regmap_dev *data); +void bd70528_wdt_unlock(struct rohm_regmap_dev *data); + +#else /* CONFIG_BD70528_WATCHDOG */ + +static inline int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, + int *old_state) +{ + return 0; +} + +static inline void bd70528_wdt_lock(struct rohm_regmap_dev *data) +{ +} + +static inline void bd70528_wdt_unlock(struct rohm_regmap_dev *data) +{ +} + +#endif /* CONFIG_BD70528_WATCHDOG */ + +#endif /* __LINUX_MFD_BD70528_H__ */ -- cgit v1.2.3 From 6bbe6f5732faeabb4bb583726ec2d7f9739532bd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Jun 2019 18:05:28 -0300 Subject: docs: thermal: convert to ReST Rename the thermal documentation files to ReST, add an index for them and adjust in order to produce a nice html output via the Sphinx build system. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Acked-by: Zhang Rui Signed-off-by: Zhang Rui --- Documentation/thermal/cpu-cooling-api.rst | 107 +++ Documentation/thermal/cpu-cooling-api.txt | 92 --- Documentation/thermal/exynos_thermal | 77 -- Documentation/thermal/exynos_thermal.rst | 90 +++ Documentation/thermal/exynos_thermal_emulation | 53 -- Documentation/thermal/exynos_thermal_emulation.rst | 61 ++ Documentation/thermal/index.rst | 18 + Documentation/thermal/intel_powerclamp.rst | 320 +++++++++ Documentation/thermal/intel_powerclamp.txt | 317 -------- Documentation/thermal/nouveau_thermal | 82 --- Documentation/thermal/nouveau_thermal.rst | 96 +++ Documentation/thermal/power_allocator.rst | 271 +++++++ Documentation/thermal/power_allocator.txt | 247 ------- Documentation/thermal/sysfs-api.rst | 798 +++++++++++++++++++++ Documentation/thermal/sysfs-api.txt | 636 ---------------- Documentation/thermal/x86_pkg_temperature_thermal | 47 -- .../thermal/x86_pkg_temperature_thermal.rst | 55 ++ MAINTAINERS | 2 +- include/linux/thermal.h | 4 +- 19 files changed, 1819 insertions(+), 1554 deletions(-) create mode 100644 Documentation/thermal/cpu-cooling-api.rst delete mode 100644 Documentation/thermal/cpu-cooling-api.txt delete mode 100644 Documentation/thermal/exynos_thermal create mode 100644 Documentation/thermal/exynos_thermal.rst delete mode 100644 Documentation/thermal/exynos_thermal_emulation create mode 100644 Documentation/thermal/exynos_thermal_emulation.rst create mode 100644 Documentation/thermal/index.rst create mode 100644 Documentation/thermal/intel_powerclamp.rst delete mode 100644 Documentation/thermal/intel_powerclamp.txt delete mode 100644 Documentation/thermal/nouveau_thermal create mode 100644 Documentation/thermal/nouveau_thermal.rst create mode 100644 Documentation/thermal/power_allocator.rst delete mode 100644 Documentation/thermal/power_allocator.txt create mode 100644 Documentation/thermal/sysfs-api.rst delete mode 100644 Documentation/thermal/sysfs-api.txt delete mode 100644 Documentation/thermal/x86_pkg_temperature_thermal create mode 100644 Documentation/thermal/x86_pkg_temperature_thermal.rst (limited to 'include/linux') diff --git a/Documentation/thermal/cpu-cooling-api.rst b/Documentation/thermal/cpu-cooling-api.rst new file mode 100644 index 000000000000..645d914c45a6 --- /dev/null +++ b/Documentation/thermal/cpu-cooling-api.rst @@ -0,0 +1,107 @@ +======================= +CPU cooling APIs How To +======================= + +Written by Amit Daniel Kachhap + +Updated: 6 Jan 2015 + +Copyright (c) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) + +0. Introduction +=============== + +The generic cpu cooling(freq clipping) provides registration/unregistration APIs +to the caller. The binding of the cooling devices to the trip point is left for +the user. The registration APIs returns the cooling device pointer. + +1. cpu cooling APIs +=================== + +1.1 cpufreq registration/unregistration APIs +-------------------------------------------- + + :: + + struct thermal_cooling_device + *cpufreq_cooling_register(struct cpumask *clip_cpus) + + This interface function registers the cpufreq cooling device with the name + "thermal-cpufreq-%x". This api can support multiple instances of cpufreq + cooling devices. + + clip_cpus: + cpumask of cpus where the frequency constraints will happen. + + :: + + struct thermal_cooling_device + *of_cpufreq_cooling_register(struct cpufreq_policy *policy) + + This interface function registers the cpufreq cooling device with + the name "thermal-cpufreq-%x" linking it with a device tree node, in + order to bind it via the thermal DT code. This api can support multiple + instances of cpufreq cooling devices. + + policy: + CPUFreq policy. + + + :: + + void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) + + This interface function unregisters the "thermal-cpufreq-%x" cooling device. + + cdev: Cooling device pointer which has to be unregistered. + +2. Power models +=============== + +The power API registration functions provide a simple power model for +CPUs. The current power is calculated as dynamic power (static power isn't +supported currently). This power model requires that the operating-points of +the CPUs are registered using the kernel's opp library and the +`cpufreq_frequency_table` is assigned to the `struct device` of the +cpu. If you are using CONFIG_CPUFREQ_DT then the +`cpufreq_frequency_table` should already be assigned to the cpu +device. + +The dynamic power consumption of a processor depends on many factors. +For a given processor implementation the primary factors are: + +- The time the processor spends running, consuming dynamic power, as + compared to the time in idle states where dynamic consumption is + negligible. Herein we refer to this as 'utilisation'. +- The voltage and frequency levels as a result of DVFS. The DVFS + level is a dominant factor governing power consumption. +- In running time the 'execution' behaviour (instruction types, memory + access patterns and so forth) causes, in most cases, a second order + variation. In pathological cases this variation can be significant, + but typically it is of a much lesser impact than the factors above. + +A high level dynamic power consumption model may then be represented as:: + + Pdyn = f(run) * Voltage^2 * Frequency * Utilisation + +f(run) here represents the described execution behaviour and its +result has a units of Watts/Hz/Volt^2 (this often expressed in +mW/MHz/uVolt^2) + +The detailed behaviour for f(run) could be modelled on-line. However, +in practice, such an on-line model has dependencies on a number of +implementation specific processor support and characterisation +factors. Therefore, in initial implementation that contribution is +represented as a constant coefficient. This is a simplification +consistent with the relative contribution to overall power variation. + +In this simplified representation our model becomes:: + + Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation + +Where `capacitance` is a constant that represents an indicative +running time dynamic power coefficient in fundamental units of +mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range +from 100 to 500. For reference, the approximate values for the SoC in +ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and +140 for the Cortex-A53 cluster. diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt deleted file mode 100644 index 7df567eaea1a..000000000000 --- a/Documentation/thermal/cpu-cooling-api.txt +++ /dev/null @@ -1,92 +0,0 @@ -CPU cooling APIs How To -=================================== - -Written by Amit Daniel Kachhap - -Updated: 6 Jan 2015 - -Copyright (c) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) - -0. Introduction - -The generic cpu cooling(freq clipping) provides registration/unregistration APIs -to the caller. The binding of the cooling devices to the trip point is left for -the user. The registration APIs returns the cooling device pointer. - -1. cpu cooling APIs - -1.1 cpufreq registration/unregistration APIs -1.1.1 struct thermal_cooling_device *cpufreq_cooling_register( - struct cpumask *clip_cpus) - - This interface function registers the cpufreq cooling device with the name - "thermal-cpufreq-%x". This api can support multiple instances of cpufreq - cooling devices. - - clip_cpus: cpumask of cpus where the frequency constraints will happen. - -1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register( - struct cpufreq_policy *policy) - - This interface function registers the cpufreq cooling device with - the name "thermal-cpufreq-%x" linking it with a device tree node, in - order to bind it via the thermal DT code. This api can support multiple - instances of cpufreq cooling devices. - - policy: CPUFreq policy. - -1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) - - This interface function unregisters the "thermal-cpufreq-%x" cooling device. - - cdev: Cooling device pointer which has to be unregistered. - -2. Power models - -The power API registration functions provide a simple power model for -CPUs. The current power is calculated as dynamic power (static power isn't -supported currently). This power model requires that the operating-points of -the CPUs are registered using the kernel's opp library and the -`cpufreq_frequency_table` is assigned to the `struct device` of the -cpu. If you are using CONFIG_CPUFREQ_DT then the -`cpufreq_frequency_table` should already be assigned to the cpu -device. - -The dynamic power consumption of a processor depends on many factors. -For a given processor implementation the primary factors are: - -- The time the processor spends running, consuming dynamic power, as - compared to the time in idle states where dynamic consumption is - negligible. Herein we refer to this as 'utilisation'. -- The voltage and frequency levels as a result of DVFS. The DVFS - level is a dominant factor governing power consumption. -- In running time the 'execution' behaviour (instruction types, memory - access patterns and so forth) causes, in most cases, a second order - variation. In pathological cases this variation can be significant, - but typically it is of a much lesser impact than the factors above. - -A high level dynamic power consumption model may then be represented as: - -Pdyn = f(run) * Voltage^2 * Frequency * Utilisation - -f(run) here represents the described execution behaviour and its -result has a units of Watts/Hz/Volt^2 (this often expressed in -mW/MHz/uVolt^2) - -The detailed behaviour for f(run) could be modelled on-line. However, -in practice, such an on-line model has dependencies on a number of -implementation specific processor support and characterisation -factors. Therefore, in initial implementation that contribution is -represented as a constant coefficient. This is a simplification -consistent with the relative contribution to overall power variation. - -In this simplified representation our model becomes: - -Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation - -Where `capacitance` is a constant that represents an indicative -running time dynamic power coefficient in fundamental units of -mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range -from 100 to 500. For reference, the approximate values for the SoC in -ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and -140 for the Cortex-A53 cluster. diff --git a/Documentation/thermal/exynos_thermal b/Documentation/thermal/exynos_thermal deleted file mode 100644 index 9010c4416967..000000000000 --- a/Documentation/thermal/exynos_thermal +++ /dev/null @@ -1,77 +0,0 @@ -Kernel driver exynos_tmu -================= - -Supported chips: -* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC - Datasheet: Not publicly available - -Authors: Donggeun Kim -Authors: Amit Daniel - -TMU controller Description: ---------------------------- - -This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC. - -The chip only exposes the measured 8-bit temperature code value -through a register. -Temperature can be taken from the temperature code. -There are three equations converting from temperature to temperature code. - -The three equations are: - 1. Two point trimming - Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1 - - 2. One point trimming - Tc = T + TI1 - 25 - - 3. No trimming - Tc = T + 50 - - Tc: Temperature code, T: Temperature, - TI1: Trimming info for 25 degree Celsius (stored at TRIMINFO register) - Temperature code measured at 25 degree Celsius which is unchanged - TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register) - Temperature code measured at 85 degree Celsius which is unchanged - -TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt -when temperature exceeds pre-defined levels. -The maximum number of configurable threshold is five. -The threshold levels are defined as follows: - Level_0: current temperature > trigger_level_0 + threshold - Level_1: current temperature > trigger_level_1 + threshold - Level_2: current temperature > trigger_level_2 + threshold - Level_3: current temperature > trigger_level_3 + threshold - - The threshold and each trigger_level are set - through the corresponding registers. - -When an interrupt occurs, this driver notify kernel thermal framework -with the function exynos_report_trigger. -Although an interrupt condition for level_0 can be set, -it can be used to synchronize the cooling action. - -TMU driver description: ------------------------ - -The exynos thermal driver is structured as, - - Kernel Core thermal framework - (thermal_core.c, step_wise.c, cpu_cooling.c) - ^ - | - | -TMU configuration data -------> TMU Driver <------> Exynos Core thermal wrapper -(exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c) -(exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h) - -a) TMU configuration data: This consist of TMU register offsets/bitfields - described through structure exynos_tmu_registers. Also several - other platform data (struct exynos_tmu_platform_data) members - are used to configure the TMU. -b) TMU driver: This component initialises the TMU controller and sets different - thresholds. It invokes core thermal implementation with the call - exynos_report_trigger. -c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the - Kernel core thermal framework. They are exynos_unregister_thermal, - exynos_register_thermal and exynos_report_trigger. diff --git a/Documentation/thermal/exynos_thermal.rst b/Documentation/thermal/exynos_thermal.rst new file mode 100644 index 000000000000..5bd556566c70 --- /dev/null +++ b/Documentation/thermal/exynos_thermal.rst @@ -0,0 +1,90 @@ +======================== +Kernel driver exynos_tmu +======================== + +Supported chips: + +* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC + + Datasheet: Not publicly available + +Authors: Donggeun Kim +Authors: Amit Daniel + +TMU controller Description: +--------------------------- + +This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC. + +The chip only exposes the measured 8-bit temperature code value +through a register. +Temperature can be taken from the temperature code. +There are three equations converting from temperature to temperature code. + +The three equations are: + 1. Two point trimming:: + + Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1 + + 2. One point trimming:: + + Tc = T + TI1 - 25 + + 3. No trimming:: + + Tc = T + 50 + + Tc: + Temperature code, T: Temperature, + TI1: + Trimming info for 25 degree Celsius (stored at TRIMINFO register) + Temperature code measured at 25 degree Celsius which is unchanged + TI2: + Trimming info for 85 degree Celsius (stored at TRIMINFO register) + Temperature code measured at 85 degree Celsius which is unchanged + +TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt +when temperature exceeds pre-defined levels. +The maximum number of configurable threshold is five. +The threshold levels are defined as follows:: + + Level_0: current temperature > trigger_level_0 + threshold + Level_1: current temperature > trigger_level_1 + threshold + Level_2: current temperature > trigger_level_2 + threshold + Level_3: current temperature > trigger_level_3 + threshold + +The threshold and each trigger_level are set +through the corresponding registers. + +When an interrupt occurs, this driver notify kernel thermal framework +with the function exynos_report_trigger. +Although an interrupt condition for level_0 can be set, +it can be used to synchronize the cooling action. + +TMU driver description: +----------------------- + +The exynos thermal driver is structured as:: + + Kernel Core thermal framework + (thermal_core.c, step_wise.c, cpu_cooling.c) + ^ + | + | + TMU configuration data -----> TMU Driver <----> Exynos Core thermal wrapper + (exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c) + (exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h) + +a) TMU configuration data: + This consist of TMU register offsets/bitfields + described through structure exynos_tmu_registers. Also several + other platform data (struct exynos_tmu_platform_data) members + are used to configure the TMU. +b) TMU driver: + This component initialises the TMU controller and sets different + thresholds. It invokes core thermal implementation with the call + exynos_report_trigger. +c) Exynos Core thermal wrapper: + This provides 3 wrapper function to use the + Kernel core thermal framework. They are exynos_unregister_thermal, + exynos_register_thermal and exynos_report_trigger. diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation deleted file mode 100644 index b15efec6ca28..000000000000 --- a/Documentation/thermal/exynos_thermal_emulation +++ /dev/null @@ -1,53 +0,0 @@ -EXYNOS EMULATION MODE -======================== - -Copyright (C) 2012 Samsung Electronics - -Written by Jonghwa Lee - -Description ------------ - -Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal management unit. -Thermal emulation mode supports software debug for TMU's operation. User can set temperature -manually with software code and TMU will read current temperature from user value not from -sensor's value. - -Enabling CONFIG_THERMAL_EMULATION option will make this support available. -When it's enabled, sysfs node will be created as -/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp. - -The sysfs node, 'emul_node', will contain value 0 for the initial state. When you input any -temperature you want to update to sysfs node, it automatically enable emulation mode and -current temperature will be changed into it. -(Exynos also supports user changeable delay time which would be used to delay of - changing temperature. However, this node only uses same delay of real sensing time, 938us.) - -Exynos emulation mode requires synchronous of value changing and enabling. It means when you -want to update the any value of delay or next temperature, then you have to enable emulation -mode at the same time. (Or you have to keep the mode enabling.) If you don't, it fails to -change the value to updated one and just use last succeessful value repeatedly. That's why -this node gives users the right to change termerpature only. Just one interface makes it more -simply to use. - -Disabling emulation mode only requires writing value 0 to sysfs node. - - -TEMP 120 | - | - 100 | - | - 80 | - | +----------- - 60 | | | - | +-------------| | - 40 | | | | - | | | | - 20 | | | +---------- - | | | | | - 0 |______________|_____________|__________|__________|_________ - A A A A TIME - |<----->| |<----->| |<----->| | - | 938us | | | | | | -emulation : 0 50 | 70 | 20 | 0 -current temp : sensor 50 70 20 sensor diff --git a/Documentation/thermal/exynos_thermal_emulation.rst b/Documentation/thermal/exynos_thermal_emulation.rst new file mode 100644 index 000000000000..c21d10838bc5 --- /dev/null +++ b/Documentation/thermal/exynos_thermal_emulation.rst @@ -0,0 +1,61 @@ +===================== +Exynos Emulation Mode +===================== + +Copyright (C) 2012 Samsung Electronics + +Written by Jonghwa Lee + +Description +----------- + +Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal +management unit. Thermal emulation mode supports software debug for +TMU's operation. User can set temperature manually with software code +and TMU will read current temperature from user value not from sensor's +value. + +Enabling CONFIG_THERMAL_EMULATION option will make this support +available. When it's enabled, sysfs node will be created as +/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp. + +The sysfs node, 'emul_node', will contain value 0 for the initial state. +When you input any temperature you want to update to sysfs node, it +automatically enable emulation mode and current temperature will be +changed into it. + +(Exynos also supports user changeable delay time which would be used to +delay of changing temperature. However, this node only uses same delay +of real sensing time, 938us.) + +Exynos emulation mode requires synchronous of value changing and +enabling. It means when you want to update the any value of delay or +next temperature, then you have to enable emulation mode at the same +time. (Or you have to keep the mode enabling.) If you don't, it fails to +change the value to updated one and just use last succeessful value +repeatedly. That's why this node gives users the right to change +termerpature only. Just one interface makes it more simply to use. + +Disabling emulation mode only requires writing value 0 to sysfs node. + +:: + + + TEMP 120 | + | + 100 | + | + 80 | + | +----------- + 60 | | | + | +-------------| | + 40 | | | | + | | | | + 20 | | | +---------- + | | | | | + 0 |______________|_____________|__________|__________|_________ + A A A A TIME + |<----->| |<----->| |<----->| | + | 938us | | | | | | + emulation : 0 50 | 70 | 20 | 0 + current temp: sensor 50 70 20 sensor diff --git a/Documentation/thermal/index.rst b/Documentation/thermal/index.rst new file mode 100644 index 000000000000..8c1c00146cad --- /dev/null +++ b/Documentation/thermal/index.rst @@ -0,0 +1,18 @@ +:orphan: + +======= +Thermal +======= + +.. toctree:: + :maxdepth: 1 + + cpu-cooling-api + sysfs-api + power_allocator + + exynos_thermal + exynos_thermal_emulation + intel_powerclamp + nouveau_thermal + x86_pkg_temperature_thermal diff --git a/Documentation/thermal/intel_powerclamp.rst b/Documentation/thermal/intel_powerclamp.rst new file mode 100644 index 000000000000..3f6dfb0b3ea6 --- /dev/null +++ b/Documentation/thermal/intel_powerclamp.rst @@ -0,0 +1,320 @@ +======================= +Intel Powerclamp Driver +======================= + +By: + - Arjan van de Ven + - Jacob Pan + +.. Contents: + + (*) Introduction + - Goals and Objectives + + (*) Theory of Operation + - Idle Injection + - Calibration + + (*) Performance Analysis + - Effectiveness and Limitations + - Power vs Performance + - Scalability + - Calibration + - Comparison with Alternative Techniques + + (*) Usage and Interfaces + - Generic Thermal Layer (sysfs) + - Kernel APIs (TBD) + +INTRODUCTION +============ + +Consider the situation where a system’s power consumption must be +reduced at runtime, due to power budget, thermal constraint, or noise +level, and where active cooling is not preferred. Software managed +passive power reduction must be performed to prevent the hardware +actions that are designed for catastrophic scenarios. + +Currently, P-states, T-states (clock modulation), and CPU offlining +are used for CPU throttling. + +On Intel CPUs, C-states provide effective power reduction, but so far +they’re only used opportunistically, based on workload. With the +development of intel_powerclamp driver, the method of synchronizing +idle injection across all online CPU threads was introduced. The goal +is to achieve forced and controllable C-state residency. + +Test/Analysis has been made in the areas of power, performance, +scalability, and user experience. In many cases, clear advantage is +shown over taking the CPU offline or modulating the CPU clock. + + +THEORY OF OPERATION +=================== + +Idle Injection +-------------- + +On modern Intel processors (Nehalem or later), package level C-state +residency is available in MSRs, thus also available to the kernel. + +These MSRs are:: + + #define MSR_PKG_C2_RESIDENCY 0x60D + #define MSR_PKG_C3_RESIDENCY 0x3F8 + #define MSR_PKG_C6_RESIDENCY 0x3F9 + #define MSR_PKG_C7_RESIDENCY 0x3FA + +If the kernel can also inject idle time to the system, then a +closed-loop control system can be established that manages package +level C-state. The intel_powerclamp driver is conceived as such a +control system, where the target set point is a user-selected idle +ratio (based on power reduction), and the error is the difference +between the actual package level C-state residency ratio and the target idle +ratio. + +Injection is controlled by high priority kernel threads, spawned for +each online CPU. + +These kernel threads, with SCHED_FIFO class, are created to perform +clamping actions of controlled duty ratio and duration. Each per-CPU +thread synchronizes its idle time and duration, based on the rounding +of jiffies, so accumulated errors can be prevented to avoid a jittery +effect. Threads are also bound to the CPU such that they cannot be +migrated, unless the CPU is taken offline. In this case, threads +belong to the offlined CPUs will be terminated immediately. + +Running as SCHED_FIFO and relatively high priority, also allows such +scheme to work for both preemptable and non-preemptable kernels. +Alignment of idle time around jiffies ensures scalability for HZ +values. This effect can be better visualized using a Perf timechart. +The following diagram shows the behavior of kernel thread +kidle_inject/cpu. During idle injection, it runs monitor/mwait idle +for a given "duration", then relinquishes the CPU to other tasks, +until the next time interval. + +The NOHZ schedule tick is disabled during idle time, but interrupts +are not masked. Tests show that the extra wakeups from scheduler tick +have a dramatic impact on the effectiveness of the powerclamp driver +on large scale systems (Westmere system with 80 processors). + +:: + + CPU0 + ____________ ____________ + kidle_inject/0 | sleep | mwait | sleep | + _________| |________| |_______ + duration + CPU1 + ____________ ____________ + kidle_inject/1 | sleep | mwait | sleep | + _________| |________| |_______ + ^ + | + | + roundup(jiffies, interval) + +Only one CPU is allowed to collect statistics and update global +control parameters. This CPU is referred to as the controlling CPU in +this document. The controlling CPU is elected at runtime, with a +policy that favors BSP, taking into account the possibility of a CPU +hot-plug. + +In terms of dynamics of the idle control system, package level idle +time is considered largely as a non-causal system where its behavior +cannot be based on the past or current input. Therefore, the +intel_powerclamp driver attempts to enforce the desired idle time +instantly as given input (target idle ratio). After injection, +powerclamp monitors the actual idle for a given time window and adjust +the next injection accordingly to avoid over/under correction. + +When used in a causal control system, such as a temperature control, +it is up to the user of this driver to implement algorithms where +past samples and outputs are included in the feedback. For example, a +PID-based thermal controller can use the powerclamp driver to +maintain a desired target temperature, based on integral and +derivative gains of the past samples. + + + +Calibration +----------- +During scalability testing, it is observed that synchronized actions +among CPUs become challenging as the number of cores grows. This is +also true for the ability of a system to enter package level C-states. + +To make sure the intel_powerclamp driver scales well, online +calibration is implemented. The goals for doing such a calibration +are: + +a) determine the effective range of idle injection ratio +b) determine the amount of compensation needed at each target ratio + +Compensation to each target ratio consists of two parts: + + a) steady state error compensation + This is to offset the error occurring when the system can + enter idle without extra wakeups (such as external interrupts). + + b) dynamic error compensation + When an excessive amount of wakeups occurs during idle, an + additional idle ratio can be added to quiet interrupts, by + slowing down CPU activities. + +A debugfs file is provided for the user to examine compensation +progress and results, such as on a Westmere system:: + + [jacob@nex01 ~]$ cat + /sys/kernel/debug/intel_powerclamp/powerclamp_calib + controlling cpu: 0 + pct confidence steady dynamic (compensation) + 0 0 0 0 + 1 1 0 0 + 2 1 1 0 + 3 3 1 0 + 4 3 1 0 + 5 3 1 0 + 6 3 1 0 + 7 3 1 0 + 8 3 1 0 + ... + 30 3 2 0 + 31 3 2 0 + 32 3 1 0 + 33 3 2 0 + 34 3 1 0 + 35 3 2 0 + 36 3 1 0 + 37 3 2 0 + 38 3 1 0 + 39 3 2 0 + 40 3 3 0 + 41 3 1 0 + 42 3 2 0 + 43 3 1 0 + 44 3 1 0 + 45 3 2 0 + 46 3 3 0 + 47 3 0 0 + 48 3 2 0 + 49 3 3 0 + +Calibration occurs during runtime. No offline method is available. +Steady state compensation is used only when confidence levels of all +adjacent ratios have reached satisfactory level. A confidence level +is accumulated based on clean data collected at runtime. Data +collected during a period without extra interrupts is considered +clean. + +To compensate for excessive amounts of wakeup during idle, additional +idle time is injected when such a condition is detected. Currently, +we have a simple algorithm to double the injection ratio. A possible +enhancement might be to throttle the offending IRQ, such as delaying +EOI for level triggered interrupts. But it is a challenge to be +non-intrusive to the scheduler or the IRQ core code. + + +CPU Online/Offline +------------------ +Per-CPU kernel threads are started/stopped upon receiving +notifications of CPU hotplug activities. The intel_powerclamp driver +keeps track of clamping kernel threads, even after they are migrated +to other CPUs, after a CPU offline event. + + +Performance Analysis +==================== +This section describes the general performance data collected on +multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P). + +Effectiveness and Limitations +----------------------------- +The maximum range that idle injection is allowed is capped at 50 +percent. As mentioned earlier, since interrupts are allowed during +forced idle time, excessive interrupts could result in less +effectiveness. The extreme case would be doing a ping -f to generated +flooded network interrupts without much CPU acknowledgement. In this +case, little can be done from the idle injection threads. In most +normal cases, such as scp a large file, applications can be throttled +by the powerclamp driver, since slowing down the CPU also slows down +network protocol processing, which in turn reduces interrupts. + +When control parameters change at runtime by the controlling CPU, it +may take an additional period for the rest of the CPUs to catch up +with the changes. During this time, idle injection is out of sync, +thus not able to enter package C- states at the expected ratio. But +this effect is minor, in that in most cases change to the target +ratio is updated much less frequently than the idle injection +frequency. + +Scalability +----------- +Tests also show a minor, but measurable, difference between the 4P/8P +Ivy Bridge system and the 80P Westmere server under 50% idle ratio. +More compensation is needed on Westmere for the same amount of +target idle ratio. The compensation also increases as the idle ratio +gets larger. The above reason constitutes the need for the +calibration code. + +On the IVB 8P system, compared to an offline CPU, powerclamp can +achieve up to 40% better performance per watt. (measured by a spin +counter summed over per CPU counting threads spawned for all running +CPUs). + +Usage and Interfaces +==================== +The powerclamp driver is registered to the generic thermal layer as a +cooling device. Currently, it’s not bound to any thermal zones:: + + jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . * + cur_state:0 + max_state:50 + type:intel_powerclamp + +cur_state allows user to set the desired idle percentage. Writing 0 to +cur_state will stop idle injection. Writing a value between 1 and +max_state will start the idle injection. Reading cur_state returns the +actual and current idle percentage. This may not be the same value +set by the user in that current idle percentage depends on workload +and includes natural idle. When idle injection is disabled, reading +cur_state returns value -1 instead of 0 which is to avoid confusing +100% busy state with the disabled state. + +Example usage: +- To inject 25% idle time:: + + $ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state + +If the system is not busy and has more than 25% idle time already, +then the powerclamp driver will not start idle injection. Using Top +will not show idle injection kernel threads. + +If the system is busy (spin test below) and has less than 25% natural +idle time, powerclamp kernel threads will do idle injection. Forced +idle time is accounted as normal idle in that common code path is +taken as the idle task. + +In this example, 24.1% idle is shown. This helps the system admin or +user determine the cause of slowdown, when a powerclamp driver is in action:: + + + Tasks: 197 total, 1 running, 196 sleeping, 0 stopped, 0 zombie + Cpu(s): 71.2%us, 4.7%sy, 0.0%ni, 24.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st + Mem: 3943228k total, 1689632k used, 2253596k free, 74960k buffers + Swap: 4087804k total, 0k used, 4087804k free, 945336k cached + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 3352 jacob 20 0 262m 644 428 S 286 0.0 0:17.16 spin + 3341 root -51 0 0 0 0 D 25 0.0 0:01.62 kidle_inject/0 + 3344 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/3 + 3342 root -51 0 0 0 0 D 25 0.0 0:01.61 kidle_inject/1 + 3343 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/2 + 2935 jacob 20 0 696m 125m 35m S 5 3.3 0:31.11 firefox + 1546 root 20 0 158m 20m 6640 S 3 0.5 0:26.97 Xorg + 2100 jacob 20 0 1223m 88m 30m S 3 2.3 0:23.68 compiz + +Tests have shown that by using the powerclamp driver as a cooling +device, a PID based userspace thermal controller can manage to +control CPU temperature effectively, when no other thermal influence +is added. For example, a UltraBook user can compile the kernel under +certain temperature (below most active trip points). diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt deleted file mode 100644 index b5df21168fbc..000000000000 --- a/Documentation/thermal/intel_powerclamp.txt +++ /dev/null @@ -1,317 +0,0 @@ - ======================= - INTEL POWERCLAMP DRIVER - ======================= -By: Arjan van de Ven - Jacob Pan - -Contents: - (*) Introduction - - Goals and Objectives - - (*) Theory of Operation - - Idle Injection - - Calibration - - (*) Performance Analysis - - Effectiveness and Limitations - - Power vs Performance - - Scalability - - Calibration - - Comparison with Alternative Techniques - - (*) Usage and Interfaces - - Generic Thermal Layer (sysfs) - - Kernel APIs (TBD) - -============ -INTRODUCTION -============ - -Consider the situation where a system’s power consumption must be -reduced at runtime, due to power budget, thermal constraint, or noise -level, and where active cooling is not preferred. Software managed -passive power reduction must be performed to prevent the hardware -actions that are designed for catastrophic scenarios. - -Currently, P-states, T-states (clock modulation), and CPU offlining -are used for CPU throttling. - -On Intel CPUs, C-states provide effective power reduction, but so far -they’re only used opportunistically, based on workload. With the -development of intel_powerclamp driver, the method of synchronizing -idle injection across all online CPU threads was introduced. The goal -is to achieve forced and controllable C-state residency. - -Test/Analysis has been made in the areas of power, performance, -scalability, and user experience. In many cases, clear advantage is -shown over taking the CPU offline or modulating the CPU clock. - - -=================== -THEORY OF OPERATION -=================== - -Idle Injection --------------- - -On modern Intel processors (Nehalem or later), package level C-state -residency is available in MSRs, thus also available to the kernel. - -These MSRs are: - #define MSR_PKG_C2_RESIDENCY 0x60D - #define MSR_PKG_C3_RESIDENCY 0x3F8 - #define MSR_PKG_C6_RESIDENCY 0x3F9 - #define MSR_PKG_C7_RESIDENCY 0x3FA - -If the kernel can also inject idle time to the system, then a -closed-loop control system can be established that manages package -level C-state. The intel_powerclamp driver is conceived as such a -control system, where the target set point is a user-selected idle -ratio (based on power reduction), and the error is the difference -between the actual package level C-state residency ratio and the target idle -ratio. - -Injection is controlled by high priority kernel threads, spawned for -each online CPU. - -These kernel threads, with SCHED_FIFO class, are created to perform -clamping actions of controlled duty ratio and duration. Each per-CPU -thread synchronizes its idle time and duration, based on the rounding -of jiffies, so accumulated errors can be prevented to avoid a jittery -effect. Threads are also bound to the CPU such that they cannot be -migrated, unless the CPU is taken offline. In this case, threads -belong to the offlined CPUs will be terminated immediately. - -Running as SCHED_FIFO and relatively high priority, also allows such -scheme to work for both preemptable and non-preemptable kernels. -Alignment of idle time around jiffies ensures scalability for HZ -values. This effect can be better visualized using a Perf timechart. -The following diagram shows the behavior of kernel thread -kidle_inject/cpu. During idle injection, it runs monitor/mwait idle -for a given "duration", then relinquishes the CPU to other tasks, -until the next time interval. - -The NOHZ schedule tick is disabled during idle time, but interrupts -are not masked. Tests show that the extra wakeups from scheduler tick -have a dramatic impact on the effectiveness of the powerclamp driver -on large scale systems (Westmere system with 80 processors). - -CPU0 - ____________ ____________ -kidle_inject/0 | sleep | mwait | sleep | - _________| |________| |_______ - duration -CPU1 - ____________ ____________ -kidle_inject/1 | sleep | mwait | sleep | - _________| |________| |_______ - ^ - | - | - roundup(jiffies, interval) - -Only one CPU is allowed to collect statistics and update global -control parameters. This CPU is referred to as the controlling CPU in -this document. The controlling CPU is elected at runtime, with a -policy that favors BSP, taking into account the possibility of a CPU -hot-plug. - -In terms of dynamics of the idle control system, package level idle -time is considered largely as a non-causal system where its behavior -cannot be based on the past or current input. Therefore, the -intel_powerclamp driver attempts to enforce the desired idle time -instantly as given input (target idle ratio). After injection, -powerclamp monitors the actual idle for a given time window and adjust -the next injection accordingly to avoid over/under correction. - -When used in a causal control system, such as a temperature control, -it is up to the user of this driver to implement algorithms where -past samples and outputs are included in the feedback. For example, a -PID-based thermal controller can use the powerclamp driver to -maintain a desired target temperature, based on integral and -derivative gains of the past samples. - - - -Calibration ------------ -During scalability testing, it is observed that synchronized actions -among CPUs become challenging as the number of cores grows. This is -also true for the ability of a system to enter package level C-states. - -To make sure the intel_powerclamp driver scales well, online -calibration is implemented. The goals for doing such a calibration -are: - -a) determine the effective range of idle injection ratio -b) determine the amount of compensation needed at each target ratio - -Compensation to each target ratio consists of two parts: - - a) steady state error compensation - This is to offset the error occurring when the system can - enter idle without extra wakeups (such as external interrupts). - - b) dynamic error compensation - When an excessive amount of wakeups occurs during idle, an - additional idle ratio can be added to quiet interrupts, by - slowing down CPU activities. - -A debugfs file is provided for the user to examine compensation -progress and results, such as on a Westmere system. -[jacob@nex01 ~]$ cat -/sys/kernel/debug/intel_powerclamp/powerclamp_calib -controlling cpu: 0 -pct confidence steady dynamic (compensation) -0 0 0 0 -1 1 0 0 -2 1 1 0 -3 3 1 0 -4 3 1 0 -5 3 1 0 -6 3 1 0 -7 3 1 0 -8 3 1 0 -... -30 3 2 0 -31 3 2 0 -32 3 1 0 -33 3 2 0 -34 3 1 0 -35 3 2 0 -36 3 1 0 -37 3 2 0 -38 3 1 0 -39 3 2 0 -40 3 3 0 -41 3 1 0 -42 3 2 0 -43 3 1 0 -44 3 1 0 -45 3 2 0 -46 3 3 0 -47 3 0 0 -48 3 2 0 -49 3 3 0 - -Calibration occurs during runtime. No offline method is available. -Steady state compensation is used only when confidence levels of all -adjacent ratios have reached satisfactory level. A confidence level -is accumulated based on clean data collected at runtime. Data -collected during a period without extra interrupts is considered -clean. - -To compensate for excessive amounts of wakeup during idle, additional -idle time is injected when such a condition is detected. Currently, -we have a simple algorithm to double the injection ratio. A possible -enhancement might be to throttle the offending IRQ, such as delaying -EOI for level triggered interrupts. But it is a challenge to be -non-intrusive to the scheduler or the IRQ core code. - - -CPU Online/Offline ------------------- -Per-CPU kernel threads are started/stopped upon receiving -notifications of CPU hotplug activities. The intel_powerclamp driver -keeps track of clamping kernel threads, even after they are migrated -to other CPUs, after a CPU offline event. - - -===================== -Performance Analysis -===================== -This section describes the general performance data collected on -multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P). - -Effectiveness and Limitations ------------------------------ -The maximum range that idle injection is allowed is capped at 50 -percent. As mentioned earlier, since interrupts are allowed during -forced idle time, excessive interrupts could result in less -effectiveness. The extreme case would be doing a ping -f to generated -flooded network interrupts without much CPU acknowledgement. In this -case, little can be done from the idle injection threads. In most -normal cases, such as scp a large file, applications can be throttled -by the powerclamp driver, since slowing down the CPU also slows down -network protocol processing, which in turn reduces interrupts. - -When control parameters change at runtime by the controlling CPU, it -may take an additional period for the rest of the CPUs to catch up -with the changes. During this time, idle injection is out of sync, -thus not able to enter package C- states at the expected ratio. But -this effect is minor, in that in most cases change to the target -ratio is updated much less frequently than the idle injection -frequency. - -Scalability ------------ -Tests also show a minor, but measurable, difference between the 4P/8P -Ivy Bridge system and the 80P Westmere server under 50% idle ratio. -More compensation is needed on Westmere for the same amount of -target idle ratio. The compensation also increases as the idle ratio -gets larger. The above reason constitutes the need for the -calibration code. - -On the IVB 8P system, compared to an offline CPU, powerclamp can -achieve up to 40% better performance per watt. (measured by a spin -counter summed over per CPU counting threads spawned for all running -CPUs). - -==================== -Usage and Interfaces -==================== -The powerclamp driver is registered to the generic thermal layer as a -cooling device. Currently, it’s not bound to any thermal zones. - -jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . * -cur_state:0 -max_state:50 -type:intel_powerclamp - -cur_state allows user to set the desired idle percentage. Writing 0 to -cur_state will stop idle injection. Writing a value between 1 and -max_state will start the idle injection. Reading cur_state returns the -actual and current idle percentage. This may not be the same value -set by the user in that current idle percentage depends on workload -and includes natural idle. When idle injection is disabled, reading -cur_state returns value -1 instead of 0 which is to avoid confusing -100% busy state with the disabled state. - -Example usage: -- To inject 25% idle time -$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state -" - -If the system is not busy and has more than 25% idle time already, -then the powerclamp driver will not start idle injection. Using Top -will not show idle injection kernel threads. - -If the system is busy (spin test below) and has less than 25% natural -idle time, powerclamp kernel threads will do idle injection. Forced -idle time is accounted as normal idle in that common code path is -taken as the idle task. - -In this example, 24.1% idle is shown. This helps the system admin or -user determine the cause of slowdown, when a powerclamp driver is in action. - - -Tasks: 197 total, 1 running, 196 sleeping, 0 stopped, 0 zombie -Cpu(s): 71.2%us, 4.7%sy, 0.0%ni, 24.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st -Mem: 3943228k total, 1689632k used, 2253596k free, 74960k buffers -Swap: 4087804k total, 0k used, 4087804k free, 945336k cached - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND - 3352 jacob 20 0 262m 644 428 S 286 0.0 0:17.16 spin - 3341 root -51 0 0 0 0 D 25 0.0 0:01.62 kidle_inject/0 - 3344 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/3 - 3342 root -51 0 0 0 0 D 25 0.0 0:01.61 kidle_inject/1 - 3343 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/2 - 2935 jacob 20 0 696m 125m 35m S 5 3.3 0:31.11 firefox - 1546 root 20 0 158m 20m 6640 S 3 0.5 0:26.97 Xorg - 2100 jacob 20 0 1223m 88m 30m S 3 2.3 0:23.68 compiz - -Tests have shown that by using the powerclamp driver as a cooling -device, a PID based userspace thermal controller can manage to -control CPU temperature effectively, when no other thermal influence -is added. For example, a UltraBook user can compile the kernel under -certain temperature (below most active trip points). diff --git a/Documentation/thermal/nouveau_thermal b/Documentation/thermal/nouveau_thermal deleted file mode 100644 index 6e17a11efcb0..000000000000 --- a/Documentation/thermal/nouveau_thermal +++ /dev/null @@ -1,82 +0,0 @@ -Kernel driver nouveau -=================== - -Supported chips: -* NV43+ - -Authors: Martin Peres (mupuf) - -Description ---------- - -This driver allows to read the GPU core temperature, drive the GPU fan and -set temperature alarms. - -Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau -cannot access any of the i2c external monitoring chips it may find. If you -have one of those, temperature and/or fan management through Nouveau's HWMON -interface is likely not to work. This document may then not cover your situation -entirely. - -Temperature management --------------------- - -Temperature is exposed under as a read-only HWMON attribute temp1_input. - -In order to protect the GPU from overheating, Nouveau supports 4 configurable -temperature thresholds: - - * Fan_boost: Fan speed is set to 100% when reaching this temperature; - * Downclock: The GPU will be downclocked to reduce its power dissipation; - * Critical: The GPU is put on hold to further lower power dissipation; - * Shutdown: Shut the computer down to protect your GPU. - -WARNING: Some of these thresholds may not be used by Nouveau depending -on your chipset. - -The default value for these thresholds comes from the GPU's vbios. These -thresholds can be configured thanks to the following HWMON attributes: - - * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst; - * Downclock: temp1_max and temp1_max_hyst; - * Critical: temp1_crit and temp1_crit_hyst; - * Shutdown: temp1_emergency and temp1_emergency_hyst. - -NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget -to multiply! - -Fan management ------------- - -Not all cards have a drivable fan. If you do, then the following HWMON -attributes should be available: - - * pwm1_enable: Current fan management mode (NONE, MANUAL or AUTO); - * pwm1: Current PWM value (power percentage); - * pwm1_min: The minimum PWM speed allowed; - * pwm1_max: The maximum PWM speed allowed (bypassed when hitting Fan_boost); - -You may also have the following attribute: - - * fan1_input: Speed in RPM of your fan. - -Your fan can be driven in different modes: - - * 0: The fan is left untouched; - * 1: The fan can be driven in manual (use pwm1 to change the speed); - * 2; The fan is driven automatically depending on the temperature. - -NOTE: Be sure to use the manual mode if you want to drive the fan speed manually - -NOTE2: When operating in manual mode outside the vbios-defined -[PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate -depending on your hardware. - -Bug reports ---------- - -Thermal management on Nouveau is new and may not work on all cards. If you have -inquiries, please ping mupuf on IRC (#nouveau, freenode). - -Bug reports should be filled on Freedesktop's bug tracker. Please follow -http://nouveau.freedesktop.org/wiki/Bugs diff --git a/Documentation/thermal/nouveau_thermal.rst b/Documentation/thermal/nouveau_thermal.rst new file mode 100644 index 000000000000..37255fd6735d --- /dev/null +++ b/Documentation/thermal/nouveau_thermal.rst @@ -0,0 +1,96 @@ +===================== +Kernel driver nouveau +===================== + +Supported chips: + +* NV43+ + +Authors: Martin Peres (mupuf) + +Description +----------- + +This driver allows to read the GPU core temperature, drive the GPU fan and +set temperature alarms. + +Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau +cannot access any of the i2c external monitoring chips it may find. If you +have one of those, temperature and/or fan management through Nouveau's HWMON +interface is likely not to work. This document may then not cover your situation +entirely. + +Temperature management +---------------------- + +Temperature is exposed under as a read-only HWMON attribute temp1_input. + +In order to protect the GPU from overheating, Nouveau supports 4 configurable +temperature thresholds: + + * Fan_boost: + Fan speed is set to 100% when reaching this temperature; + * Downclock: + The GPU will be downclocked to reduce its power dissipation; + * Critical: + The GPU is put on hold to further lower power dissipation; + * Shutdown: + Shut the computer down to protect your GPU. + +WARNING: + Some of these thresholds may not be used by Nouveau depending + on your chipset. + +The default value for these thresholds comes from the GPU's vbios. These +thresholds can be configured thanks to the following HWMON attributes: + + * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst; + * Downclock: temp1_max and temp1_max_hyst; + * Critical: temp1_crit and temp1_crit_hyst; + * Shutdown: temp1_emergency and temp1_emergency_hyst. + +NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget +to multiply! + +Fan management +-------------- + +Not all cards have a drivable fan. If you do, then the following HWMON +attributes should be available: + + * pwm1_enable: + Current fan management mode (NONE, MANUAL or AUTO); + * pwm1: + Current PWM value (power percentage); + * pwm1_min: + The minimum PWM speed allowed; + * pwm1_max: + The maximum PWM speed allowed (bypassed when hitting Fan_boost); + +You may also have the following attribute: + + * fan1_input: + Speed in RPM of your fan. + +Your fan can be driven in different modes: + + * 0: The fan is left untouched; + * 1: The fan can be driven in manual (use pwm1 to change the speed); + * 2; The fan is driven automatically depending on the temperature. + +NOTE: + Be sure to use the manual mode if you want to drive the fan speed manually + +NOTE2: + When operating in manual mode outside the vbios-defined + [PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate + depending on your hardware. + +Bug reports +----------- + +Thermal management on Nouveau is new and may not work on all cards. If you have +inquiries, please ping mupuf on IRC (#nouveau, freenode). + +Bug reports should be filled on Freedesktop's bug tracker. Please follow +http://nouveau.freedesktop.org/wiki/Bugs diff --git a/Documentation/thermal/power_allocator.rst b/Documentation/thermal/power_allocator.rst new file mode 100644 index 000000000000..67b6a3297238 --- /dev/null +++ b/Documentation/thermal/power_allocator.rst @@ -0,0 +1,271 @@ +================================= +Power allocator governor tunables +================================= + +Trip points +----------- + +The governor works optimally with the following two passive trip points: + +1. "switch on" trip point: temperature above which the governor + control loop starts operating. This is the first passive trip + point of the thermal zone. + +2. "desired temperature" trip point: it should be higher than the + "switch on" trip point. This the target temperature the governor + is controlling for. This is the last passive trip point of the + thermal zone. + +PID Controller +-------------- + +The power allocator governor implements a +Proportional-Integral-Derivative controller (PID controller) with +temperature as the control input and power as the controlled output: + + P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power + +where + - e = desired_temperature - current_temperature + - err_integral is the sum of previous errors + - diff_err = e - previous_error + +It is similar to the one depicted below:: + + k_d + | + current_temp | + | v + | +----------+ +---+ + | +----->| diff_err |-->| X |------+ + | | +----------+ +---+ | + | | | tdp actor + | | k_i | | get_requested_power() + | | | | | | | + | | | | | | | ... + v | v v v v v + +---+ | +-------+ +---+ +---+ +---+ +----------+ + | S |-----+----->| sum e |----->| X |--->| S |-->| S |-->|power | + +---+ | +-------+ +---+ +---+ +---+ |allocation| + ^ | ^ +----------+ + | | | | | + | | +---+ | | | + | +------->| X |-------------------+ v v + | +---+ granted performance + desired_temperature ^ + | + | + k_po/k_pu + +Sustainable power +----------------- + +An estimate of the sustainable dissipatable power (in mW) should be +provided while registering the thermal zone. This estimates the +sustained power that can be dissipated at the desired control +temperature. This is the maximum sustained power for allocation at +the desired maximum temperature. The actual sustained power can vary +for a number of reasons. The closed loop controller will take care of +variations such as environmental conditions, and some factors related +to the speed-grade of the silicon. `sustainable_power` is therefore +simply an estimate, and may be tuned to affect the aggressiveness of +the thermal ramp. For reference, the sustainable power of a 4" phone +is typically 2000mW, while on a 10" tablet is around 4500mW (may vary +depending on screen size). + +If you are using device tree, do add it as a property of the +thermal-zone. For example:: + + thermal-zones { + soc_thermal { + polling-delay = <1000>; + polling-delay-passive = <100>; + sustainable-power = <2500>; + ... + +Instead, if the thermal zone is registered from the platform code, pass a +`thermal_zone_params` that has a `sustainable_power`. If no +`thermal_zone_params` were being passed, then something like below +will suffice:: + + static const struct thermal_zone_params tz_params = { + .sustainable_power = 3500, + }; + +and then pass `tz_params` as the 5th parameter to +`thermal_zone_device_register()` + +k_po and k_pu +------------- + +The implementation of the PID controller in the power allocator +thermal governor allows the configuration of two proportional term +constants: `k_po` and `k_pu`. `k_po` is the proportional term +constant during temperature overshoot periods (current temperature is +above "desired temperature" trip point). Conversely, `k_pu` is the +proportional term constant during temperature undershoot periods +(current temperature below "desired temperature" trip point). + +These controls are intended as the primary mechanism for configuring +the permitted thermal "ramp" of the system. For instance, a lower +`k_pu` value will provide a slower ramp, at the cost of capping +available capacity at a low temperature. On the other hand, a high +value of `k_pu` will result in the governor granting very high power +while temperature is low, and may lead to temperature overshooting. + +The default value for `k_pu` is:: + + 2 * sustainable_power / (desired_temperature - switch_on_temp) + +This means that at `switch_on_temp` the output of the controller's +proportional term will be 2 * `sustainable_power`. The default value +for `k_po` is:: + + sustainable_power / (desired_temperature - switch_on_temp) + +Focusing on the proportional and feed forward values of the PID +controller equation we have:: + + P_max = k_p * e + sustainable_power + +The proportional term is proportional to the difference between the +desired temperature and the current one. When the current temperature +is the desired one, then the proportional component is zero and +`P_max` = `sustainable_power`. That is, the system should operate in +thermal equilibrium under constant load. `sustainable_power` is only +an estimate, which is the reason for closed-loop control such as this. + +Expanding `k_pu` we get:: + + P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) + + sustainable_power + +where: + + - T_set is the desired temperature + - T is the current temperature + - T_on is the switch on temperature + +When the current temperature is the switch_on temperature, the above +formula becomes:: + + P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) + + sustainable_power = 2 * sustainable_power + sustainable_power = + 3 * sustainable_power + +Therefore, the proportional term alone linearly decreases power from +3 * `sustainable_power` to `sustainable_power` as the temperature +rises from the switch on temperature to the desired temperature. + +k_i and integral_cutoff +----------------------- + +`k_i` configures the PID loop's integral term constant. This term +allows the PID controller to compensate for long term drift and for +the quantized nature of the output control: cooling devices can't set +the exact power that the governor requests. When the temperature +error is below `integral_cutoff`, errors are accumulated in the +integral term. This term is then multiplied by `k_i` and the result +added to the output of the controller. Typically `k_i` is set low (1 +or 2) and `integral_cutoff` is 0. + +k_d +--- + +`k_d` configures the PID loop's derivative term constant. It's +recommended to leave it as the default: 0. + +Cooling device power API +======================== + +Cooling devices controlled by this governor must supply the additional +"power" API in their `cooling_device_ops`. It consists on three ops: + +1. :: + + int get_requested_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *power); + + +@cdev: + The `struct thermal_cooling_device` pointer +@tz: + thermal zone in which we are currently operating +@power: + pointer in which to store the calculated power + +`get_requested_power()` calculates the power requested by the device +in milliwatts and stores it in @power . It should return 0 on +success, -E* on failure. This is currently used by the power +allocator governor to calculate how much power to give to each cooling +device. + +2. :: + + int state2power(struct thermal_cooling_device *cdev, struct + thermal_zone_device *tz, unsigned long state, + u32 *power); + +@cdev: + The `struct thermal_cooling_device` pointer +@tz: + thermal zone in which we are currently operating +@state: + A cooling device state +@power: + pointer in which to store the equivalent power + +Convert cooling device state @state into power consumption in +milliwatts and store it in @power. It should return 0 on success, -E* +on failure. This is currently used by thermal core to calculate the +maximum power that an actor can consume. + +3. :: + + int power2state(struct thermal_cooling_device *cdev, u32 power, + unsigned long *state); + +@cdev: + The `struct thermal_cooling_device` pointer +@power: + power in milliwatts +@state: + pointer in which to store the resulting state + +Calculate a cooling device state that would make the device consume at +most @power mW and store it in @state. It should return 0 on success, +-E* on failure. This is currently used by the thermal core to convert +a given power set by the power allocator governor to a state that the +cooling device can set. It is a function because this conversion may +depend on external factors that may change so this function should the +best conversion given "current circumstances". + +Cooling device weights +---------------------- + +Weights are a mechanism to bias the allocation among cooling +devices. They express the relative power efficiency of different +cooling devices. Higher weight can be used to express higher power +efficiency. Weighting is relative such that if each cooling device +has a weight of one they are considered equal. This is particularly +useful in heterogeneous systems where two cooling devices may perform +the same kind of compute, but with different efficiency. For example, +a system with two different types of processors. + +If the thermal zone is registered using +`thermal_zone_device_register()` (i.e., platform code), then weights +are passed as part of the thermal zone's `thermal_bind_parameters`. +If the platform is registered using device tree, then they are passed +as the `contribution` property of each map in the `cooling-maps` node. + +Limitations of the power allocator governor +=========================================== + +The power allocator governor's PID controller works best if there is a +periodic tick. If you have a driver that calls +`thermal_zone_device_update()` (or anything that ends up calling the +governor's `throttle()` function) repetitively, the governor response +won't be very good. Note that this is not particular to this +governor, step-wise will also misbehave if you call its throttle() +faster than the normal thermal framework tick (due to interrupts for +example) as it will overreact. diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt deleted file mode 100644 index 9fb0ff06dca9..000000000000 --- a/Documentation/thermal/power_allocator.txt +++ /dev/null @@ -1,247 +0,0 @@ -Power allocator governor tunables -================================= - -Trip points ------------ - -The governor works optimally with the following two passive trip points: - -1. "switch on" trip point: temperature above which the governor - control loop starts operating. This is the first passive trip - point of the thermal zone. - -2. "desired temperature" trip point: it should be higher than the - "switch on" trip point. This the target temperature the governor - is controlling for. This is the last passive trip point of the - thermal zone. - -PID Controller --------------- - -The power allocator governor implements a -Proportional-Integral-Derivative controller (PID controller) with -temperature as the control input and power as the controlled output: - - P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power - -where - e = desired_temperature - current_temperature - err_integral is the sum of previous errors - diff_err = e - previous_error - -It is similar to the one depicted below: - - k_d - | -current_temp | - | v - | +----------+ +---+ - | +----->| diff_err |-->| X |------+ - | | +----------+ +---+ | - | | | tdp actor - | | k_i | | get_requested_power() - | | | | | | | - | | | | | | | ... - v | v v v v v - +---+ | +-------+ +---+ +---+ +---+ +----------+ - | S |-------+----->| sum e |----->| X |--->| S |-->| S |-->|power | - +---+ | +-------+ +---+ +---+ +---+ |allocation| - ^ | ^ +----------+ - | | | | | - | | +---+ | | | - | +------->| X |-------------------+ v v - | +---+ granted performance -desired_temperature ^ - | - | - k_po/k_pu - -Sustainable power ------------------ - -An estimate of the sustainable dissipatable power (in mW) should be -provided while registering the thermal zone. This estimates the -sustained power that can be dissipated at the desired control -temperature. This is the maximum sustained power for allocation at -the desired maximum temperature. The actual sustained power can vary -for a number of reasons. The closed loop controller will take care of -variations such as environmental conditions, and some factors related -to the speed-grade of the silicon. `sustainable_power` is therefore -simply an estimate, and may be tuned to affect the aggressiveness of -the thermal ramp. For reference, the sustainable power of a 4" phone -is typically 2000mW, while on a 10" tablet is around 4500mW (may vary -depending on screen size). - -If you are using device tree, do add it as a property of the -thermal-zone. For example: - - thermal-zones { - soc_thermal { - polling-delay = <1000>; - polling-delay-passive = <100>; - sustainable-power = <2500>; - ... - -Instead, if the thermal zone is registered from the platform code, pass a -`thermal_zone_params` that has a `sustainable_power`. If no -`thermal_zone_params` were being passed, then something like below -will suffice: - - static const struct thermal_zone_params tz_params = { - .sustainable_power = 3500, - }; - -and then pass `tz_params` as the 5th parameter to -`thermal_zone_device_register()` - -k_po and k_pu -------------- - -The implementation of the PID controller in the power allocator -thermal governor allows the configuration of two proportional term -constants: `k_po` and `k_pu`. `k_po` is the proportional term -constant during temperature overshoot periods (current temperature is -above "desired temperature" trip point). Conversely, `k_pu` is the -proportional term constant during temperature undershoot periods -(current temperature below "desired temperature" trip point). - -These controls are intended as the primary mechanism for configuring -the permitted thermal "ramp" of the system. For instance, a lower -`k_pu` value will provide a slower ramp, at the cost of capping -available capacity at a low temperature. On the other hand, a high -value of `k_pu` will result in the governor granting very high power -while temperature is low, and may lead to temperature overshooting. - -The default value for `k_pu` is: - - 2 * sustainable_power / (desired_temperature - switch_on_temp) - -This means that at `switch_on_temp` the output of the controller's -proportional term will be 2 * `sustainable_power`. The default value -for `k_po` is: - - sustainable_power / (desired_temperature - switch_on_temp) - -Focusing on the proportional and feed forward values of the PID -controller equation we have: - - P_max = k_p * e + sustainable_power - -The proportional term is proportional to the difference between the -desired temperature and the current one. When the current temperature -is the desired one, then the proportional component is zero and -`P_max` = `sustainable_power`. That is, the system should operate in -thermal equilibrium under constant load. `sustainable_power` is only -an estimate, which is the reason for closed-loop control such as this. - -Expanding `k_pu` we get: - P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) + - sustainable_power - -where - T_set is the desired temperature - T is the current temperature - T_on is the switch on temperature - -When the current temperature is the switch_on temperature, the above -formula becomes: - - P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) + - sustainable_power = 2 * sustainable_power + sustainable_power = - 3 * sustainable_power - -Therefore, the proportional term alone linearly decreases power from -3 * `sustainable_power` to `sustainable_power` as the temperature -rises from the switch on temperature to the desired temperature. - -k_i and integral_cutoff ------------------------ - -`k_i` configures the PID loop's integral term constant. This term -allows the PID controller to compensate for long term drift and for -the quantized nature of the output control: cooling devices can't set -the exact power that the governor requests. When the temperature -error is below `integral_cutoff`, errors are accumulated in the -integral term. This term is then multiplied by `k_i` and the result -added to the output of the controller. Typically `k_i` is set low (1 -or 2) and `integral_cutoff` is 0. - -k_d ---- - -`k_d` configures the PID loop's derivative term constant. It's -recommended to leave it as the default: 0. - -Cooling device power API -======================== - -Cooling devices controlled by this governor must supply the additional -"power" API in their `cooling_device_ops`. It consists on three ops: - -1. int get_requested_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *power); -@cdev: The `struct thermal_cooling_device` pointer -@tz: thermal zone in which we are currently operating -@power: pointer in which to store the calculated power - -`get_requested_power()` calculates the power requested by the device -in milliwatts and stores it in @power . It should return 0 on -success, -E* on failure. This is currently used by the power -allocator governor to calculate how much power to give to each cooling -device. - -2. int state2power(struct thermal_cooling_device *cdev, struct - thermal_zone_device *tz, unsigned long state, u32 *power); -@cdev: The `struct thermal_cooling_device` pointer -@tz: thermal zone in which we are currently operating -@state: A cooling device state -@power: pointer in which to store the equivalent power - -Convert cooling device state @state into power consumption in -milliwatts and store it in @power. It should return 0 on success, -E* -on failure. This is currently used by thermal core to calculate the -maximum power that an actor can consume. - -3. int power2state(struct thermal_cooling_device *cdev, u32 power, - unsigned long *state); -@cdev: The `struct thermal_cooling_device` pointer -@power: power in milliwatts -@state: pointer in which to store the resulting state - -Calculate a cooling device state that would make the device consume at -most @power mW and store it in @state. It should return 0 on success, --E* on failure. This is currently used by the thermal core to convert -a given power set by the power allocator governor to a state that the -cooling device can set. It is a function because this conversion may -depend on external factors that may change so this function should the -best conversion given "current circumstances". - -Cooling device weights ----------------------- - -Weights are a mechanism to bias the allocation among cooling -devices. They express the relative power efficiency of different -cooling devices. Higher weight can be used to express higher power -efficiency. Weighting is relative such that if each cooling device -has a weight of one they are considered equal. This is particularly -useful in heterogeneous systems where two cooling devices may perform -the same kind of compute, but with different efficiency. For example, -a system with two different types of processors. - -If the thermal zone is registered using -`thermal_zone_device_register()` (i.e., platform code), then weights -are passed as part of the thermal zone's `thermal_bind_parameters`. -If the platform is registered using device tree, then they are passed -as the `contribution` property of each map in the `cooling-maps` node. - -Limitations of the power allocator governor -=========================================== - -The power allocator governor's PID controller works best if there is a -periodic tick. If you have a driver that calls -`thermal_zone_device_update()` (or anything that ends up calling the -governor's `throttle()` function) repetitively, the governor response -won't be very good. Note that this is not particular to this -governor, step-wise will also misbehave if you call its throttle() -faster than the normal thermal framework tick (due to interrupts for -example) as it will overreact. diff --git a/Documentation/thermal/sysfs-api.rst b/Documentation/thermal/sysfs-api.rst new file mode 100644 index 000000000000..e4930761d3e5 --- /dev/null +++ b/Documentation/thermal/sysfs-api.rst @@ -0,0 +1,798 @@ +=================================== +Generic Thermal Sysfs driver How To +=================================== + +Written by Sujith Thomas , Zhang Rui + +Updated: 2 January 2008 + +Copyright (c) 2008 Intel Corporation + + +0. Introduction +=============== + +The generic thermal sysfs provides a set of interfaces for thermal zone +devices (sensors) and thermal cooling devices (fan, processor...) to register +with the thermal management solution and to be a part of it. + +This how-to focuses on enabling new thermal zone and cooling devices to +participate in thermal management. +This solution is platform independent and any type of thermal zone devices +and cooling devices should be able to make use of the infrastructure. + +The main task of the thermal sysfs driver is to expose thermal zone attributes +as well as cooling device attributes to the user space. +An intelligent thermal management application can make decisions based on +inputs from thermal zone attributes (the current temperature and trip point +temperature) and throttle appropriate devices. + +- `[0-*]` denotes any positive number starting from 0 +- `[1-*]` denotes any positive number starting from 1 + +1. thermal sysfs driver interface functions +=========================================== + +1.1 thermal zone device interface +--------------------------------- + + :: + + struct thermal_zone_device + *thermal_zone_device_register(char *type, + int trips, int mask, void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay)) + + This interface function adds a new thermal zone device (sensor) to + /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the + thermal cooling devices registered at the same time. + + type: + the thermal zone type. + trips: + the total number of trip points this thermal zone supports. + mask: + Bit string: If 'n'th bit is set, then trip point 'n' is writeable. + devdata: + device private data + ops: + thermal zone device call-backs. + + .bind: + bind the thermal zone device with a thermal cooling device. + .unbind: + unbind the thermal zone device with a thermal cooling device. + .get_temp: + get the current temperature of the thermal zone. + .set_trips: + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .get_mode: + get the current mode (enabled/disabled) of the thermal zone. + + - "enabled" means the kernel thermal management is + enabled. + - "disabled" will prevent kernel thermal driver action + upon trip points so that user applications can take + charge of thermal management. + .set_mode: + set the mode (enabled/disabled) of the thermal zone. + .get_trip_type: + get the type of certain trip point. + .get_trip_temp: + get the temperature above which the certain trip point + will be fired. + .set_emul_temp: + set the emulation temperature which helps in debugging + different threshold temperature points. + tzp: + thermal zone platform parameters. + passive_delay: + number of milliseconds to wait between polls when + performing passive cooling. + polling_delay: + number of milliseconds to wait between polls when checking + whether trip points have been crossed (0 for interrupt driven systems). + + :: + + void thermal_zone_device_unregister(struct thermal_zone_device *tz) + + This interface function removes the thermal zone device. + It deletes the corresponding entry from /sys/class/thermal folder and + unbinds all the thermal cooling devices it uses. + + :: + + struct thermal_zone_device + *thermal_zone_of_sensor_register(struct device *dev, int sensor_id, + void *data, + const struct thermal_zone_of_device_ops *ops) + + This interface adds a new sensor to a DT thermal zone. + This function will search the list of thermal zones described in + device tree and look for the zone that refer to the sensor device + pointed by dev->of_node as temperature providers. For the zone + pointing to the sensor node, the sensor will be added to the DT + thermal zone device. + + The parameters for this interface are: + + dev: + Device node of sensor containing valid node pointer in + dev->of_node. + sensor_id: + a sensor identifier, in case the sensor IP has more + than one sensors + data: + a private pointer (owned by the caller) that will be + passed back, when a temperature reading is needed. + ops: + `struct thermal_zone_of_device_ops *`. + + ============== ======================================= + get_temp a pointer to a function that reads the + sensor temperature. This is mandatory + callback provided by sensor driver. + set_trips a pointer to a function that sets a + temperature window. When this window is + left the driver must inform the thermal + core via thermal_zone_device_update. + get_trend a pointer to a function that reads the + sensor temperature trend. + set_emul_temp a pointer to a function that sets + sensor emulated temperature. + ============== ======================================= + + The thermal zone temperature is provided by the get_temp() function + pointer of thermal_zone_of_device_ops. When called, it will + have the private pointer @data back. + + It returns error pointer if fails otherwise valid thermal zone device + handle. Caller should check the return handle with IS_ERR() for finding + whether success or not. + + :: + + void thermal_zone_of_sensor_unregister(struct device *dev, + struct thermal_zone_device *tzd) + + This interface unregisters a sensor from a DT thermal zone which was + successfully added by interface thermal_zone_of_sensor_register(). + This function removes the sensor callbacks and private data from the + thermal zone device registered with thermal_zone_of_sensor_register() + interface. It will also silent the zone by remove the .get_temp() and + get_trend() thermal zone device callbacks. + + :: + + struct thermal_zone_device + *devm_thermal_zone_of_sensor_register(struct device *dev, + int sensor_id, + void *data, + const struct thermal_zone_of_device_ops *ops) + + This interface is resource managed version of + thermal_zone_of_sensor_register(). + + All details of thermal_zone_of_sensor_register() described in + section 1.1.3 is applicable here. + + The benefit of using this interface to register sensor is that it + is not require to explicitly call thermal_zone_of_sensor_unregister() + in error path or during driver unbinding as this is done by driver + resource manager. + + :: + + void devm_thermal_zone_of_sensor_unregister(struct device *dev, + struct thermal_zone_device *tzd) + + This interface is resource managed version of + thermal_zone_of_sensor_unregister(). + All details of thermal_zone_of_sensor_unregister() described in + section 1.1.4 is applicable here. + Normally this function will not need to be called and the resource + management code will ensure that the resource is freed. + + :: + + int thermal_zone_get_slope(struct thermal_zone_device *tz) + + This interface is used to read the slope attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + + :: + + int thermal_zone_get_offset(struct thermal_zone_device *tz) + + This interface is used to read the offset attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + +1.2 thermal cooling device interface +------------------------------------ + + + :: + + struct thermal_cooling_device + *thermal_cooling_device_register(char *name, + void *devdata, struct thermal_cooling_device_ops *) + + This interface function adds a new thermal cooling device (fan/processor/...) + to /sys/class/thermal/ folder as `cooling_device[0-*]`. It tries to bind itself + to all the thermal zone devices registered at the same time. + + name: + the cooling device name. + devdata: + device private data. + ops: + thermal cooling devices call-backs. + + .get_max_state: + get the Maximum throttle state of the cooling device. + .get_cur_state: + get the Currently requested throttle state of the + cooling device. + .set_cur_state: + set the Current throttle state of the cooling device. + + :: + + void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) + + This interface function removes the thermal cooling device. + It deletes the corresponding entry from /sys/class/thermal folder and + unbinds itself from all the thermal zone devices using it. + +1.3 interface for binding a thermal zone device with a thermal cooling device +----------------------------------------------------------------------------- + + :: + + int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, unsigned int weight); + + This interface function binds a thermal cooling device to a particular trip + point of a thermal zone device. + + This function is usually called in the thermal zone device .bind callback. + + tz: + the thermal zone device + cdev: + thermal cooling device + trip: + indicates which trip point in this thermal zone the cooling device + is associated with. + upper: + the Maximum cooling state for this trip point. + THERMAL_NO_LIMIT means no upper limit, + and the cooling device can be in max_state. + lower: + the Minimum cooling state can be used for this trip point. + THERMAL_NO_LIMIT means no lower limit, + and the cooling device can be in cooling state 0. + weight: + the influence of this cooling device in this thermal + zone. See 1.4.1 below for more information. + + :: + + int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function unbinds a thermal cooling device from a particular + trip point of a thermal zone device. This function is usually called in + the thermal zone device .unbind callback. + + tz: + the thermal zone device + cdev: + thermal cooling device + trip: + indicates which trip point in this thermal zone the cooling device + is associated with. + +1.4 Thermal Zone Parameters +--------------------------- + + :: + + struct thermal_bind_params + + This structure defines the following parameters that are used to bind + a zone with a cooling device for a particular trip point. + + .cdev: + The cooling device pointer + .weight: + The 'influence' of a particular cooling device on this + zone. This is relative to the rest of the cooling + devices. For example, if all cooling devices have a + weight of 1, then they all contribute the same. You can + use percentages if you want, but it's not mandatory. A + weight of 0 means that this cooling device doesn't + contribute to the cooling of this zone unless all cooling + devices have a weight of 0. If all weights are 0, then + they all contribute the same. + .trip_mask: + This is a bit mask that gives the binding relation between + this thermal zone and cdev, for a particular trip point. + If nth bit is set, then the cdev and thermal zone are bound + for trip point n. + .binding_limits: + This is an array of cooling state limits. Must have + exactly 2 * thermal_zone.number_of_trip_points. It is an + array consisting of tuples of + state limits. Each trip will be associated with one state + limit tuple when binding. A NULL pointer means + on all trips. + These limits are used when binding a cdev to a trip point. + .match: + This call back returns success(0) if the 'tz and cdev' need to + be bound, as per platform data. + + :: + + struct thermal_zone_params + + This structure defines the platform level parameters for a thermal zone. + This data, for each thermal zone should come from the platform layer. + This is an optional feature where some platforms can choose not to + provide this data. + + .governor_name: + Name of the thermal governor used for this zone + .no_hwmon: + a boolean to indicate if the thermal to hwmon sysfs interface + is required. when no_hwmon == false, a hwmon sysfs interface + will be created. when no_hwmon == true, nothing will be done. + In case the thermal_zone_params is NULL, the hwmon interface + will be created (for backward compatibility). + .num_tbps: + Number of thermal_bind_params entries for this zone + .tbp: + thermal_bind_params entries + +2. sysfs attributes structure +============================= + +== ================ +RO read only value +WO write only value +RW read/write value +== ================ + +Thermal sysfs attributes will be represented under /sys/class/thermal. +Hwmon sysfs I/F extension is also available under /sys/class/hwmon +if hwmon is compiled in or built as a module. + +Thermal zone device sys I/F, created once it's registered:: + + /sys/class/thermal/thermal_zone[0-*]: + |---type: Type of the thermal zone + |---temp: Current temperature + |---mode: Working mode of the thermal zone + |---policy: Thermal governor used for this zone + |---available_policies: Available thermal governors for this zone + |---trip_point_[0-*]_temp: Trip point temperature + |---trip_point_[0-*]_type: Trip point type + |---trip_point_[0-*]_hyst: Hysteresis value for this trip point + |---emul_temp: Emulated temperature set node + |---sustainable_power: Sustainable dissipatable power + |---k_po: Proportional term during temperature overshoot + |---k_pu: Proportional term during temperature undershoot + |---k_i: PID's integral term in the power allocator gov + |---k_d: PID's derivative term in the power allocator + |---integral_cutoff: Offset above which errors are accumulated + |---slope: Slope constant applied as linear extrapolation + |---offset: Offset constant applied as linear extrapolation + +Thermal cooling device sys I/F, created once it's registered:: + + /sys/class/thermal/cooling_device[0-*]: + |---type: Type of the cooling device(processor/fan/...) + |---max_state: Maximum cooling state of the cooling device + |---cur_state: Current cooling state of the cooling device + |---stats: Directory containing cooling device's statistics + |---stats/reset: Writing any value resets the statistics + |---stats/time_in_state_ms: Time (msec) spent in various cooling states + |---stats/total_trans: Total number of times cooling state is changed + |---stats/trans_table: Cooing state transition table + + +Then next two dynamic attributes are created/removed in pairs. They represent +the relationship between a thermal zone and its associated cooling device. +They are created/removed for each successful execution of +thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device. + +:: + + /sys/class/thermal/thermal_zone[0-*]: + |---cdev[0-*]: [0-*]th cooling device in current thermal zone + |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with + |---cdev[0-*]_weight: Influence of the cooling device in + this thermal zone + +Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, +the generic thermal driver also creates a hwmon sysfs I/F for each _type_ +of thermal zone device. E.g. the generic thermal driver registers one hwmon +class device and build the associated hwmon sysfs I/F for all the registered +ACPI thermal zones. + +:: + + /sys/class/hwmon/hwmon[0-*]: + |---name: The type of the thermal zone devices + |---temp[1-*]_input: The current temperature of thermal zone [1-*] + |---temp[1-*]_critical: The critical trip point of thermal zone [1-*] + +Please read Documentation/hwmon/sysfs-interface.rst for additional information. + +Thermal zone attributes +----------------------- + +type + Strings which represent the thermal zone type. + This is given by thermal zone driver as part of registration. + E.g: "acpitz" indicates it's an ACPI thermal device. + In order to keep it consistent with hwmon sys attribute; this should + be a short, lowercase string, not containing spaces nor dashes. + RO, Required + +temp + Current temperature as reported by thermal zone (sensor). + Unit: millidegree Celsius + RO, Required + +mode + One of the predefined values in [enabled, disabled]. + This file gives information about the algorithm that is currently + managing the thermal zone. It can be either default kernel based + algorithm or user space application. + + enabled + enable Kernel Thermal management. + disabled + Preventing kernel thermal zone driver actions upon + trip points so that user application can take full + charge of the thermal management. + + RW, Optional + +policy + One of the various thermal governors used for a particular zone. + + RW, Required + +available_policies + Available thermal governors which can be used for a particular zone. + + RO, Required + +`trip_point_[0-*]_temp` + The temperature above which trip point will be fired. + + Unit: millidegree Celsius + + RO, Optional + +`trip_point_[0-*]_type` + Strings which indicate the type of the trip point. + + E.g. it can be one of critical, hot, passive, `active[0-*]` for ACPI + thermal zone. + + RO, Optional + +`trip_point_[0-*]_hyst` + The hysteresis value for a trip point, represented as an integer + Unit: Celsius + RW, Optional + +`cdev[0-*]` + Sysfs link to the thermal cooling device node where the sys I/F + for cooling device throttling control represents. + + RO, Optional + +`cdev[0-*]_trip_point` + The trip point in this thermal zone which `cdev[0-*]` is associated + with; -1 means the cooling device is not associated with any trip + point. + + RO, Optional + +`cdev[0-*]_weight` + The influence of `cdev[0-*]` in this thermal zone. This value + is relative to the rest of cooling devices in the thermal + zone. For example, if a cooling device has a weight double + than that of other, it's twice as effective in cooling the + thermal zone. + + RW, Optional + +passive + Attribute is only present for zones in which the passive cooling + policy is not supported by native thermal driver. Default is zero + and can be set to a temperature (in millidegrees) to enable a + passive trip point for the zone. Activation is done by polling with + an interval of 1 second. + + Unit: millidegrees Celsius + + Valid values: 0 (disabled) or greater than 1000 + + RW, Optional + +emul_temp + Interface to set the emulated temperature method in thermal zone + (sensor). After setting this temperature, the thermal zone may pass + this temperature to platform emulation function if registered or + cache it locally. This is useful in debugging different temperature + threshold and its associated cooling action. This is write only node + and writing 0 on this node should disable emulation. + Unit: millidegree Celsius + + WO, Optional + + WARNING: + Be careful while enabling this option on production systems, + because userland can easily disable the thermal policy by simply + flooding this sysfs node with low temperature values. + +sustainable_power + An estimate of the sustained power that can be dissipated by + the thermal zone. Used by the power allocator governor. For + more information see Documentation/thermal/power_allocator.rst + + Unit: milliwatts + + RW, Optional + +k_po + The proportional term of the power allocator governor's PID + controller during temperature overshoot. Temperature overshoot + is when the current temperature is above the "desired + temperature" trip point. For more information see + Documentation/thermal/power_allocator.rst + + RW, Optional + +k_pu + The proportional term of the power allocator governor's PID + controller during temperature undershoot. Temperature undershoot + is when the current temperature is below the "desired + temperature" trip point. For more information see + Documentation/thermal/power_allocator.rst + + RW, Optional + +k_i + The integral term of the power allocator governor's PID + controller. This term allows the PID controller to compensate + for long term drift. For more information see + Documentation/thermal/power_allocator.rst + + RW, Optional + +k_d + The derivative term of the power allocator governor's PID + controller. For more information see + Documentation/thermal/power_allocator.rst + + RW, Optional + +integral_cutoff + Temperature offset from the desired temperature trip point + above which the integral term of the power allocator + governor's PID controller starts accumulating errors. For + example, if integral_cutoff is 0, then the integral term only + accumulates error when temperature is above the desired + temperature trip point. For more information see + Documentation/thermal/power_allocator.rst + + Unit: millidegree Celsius + + RW, Optional + +slope + The slope constant used in a linear extrapolation model + to determine a hotspot temperature based off the sensor's + raw readings. It is up to the device driver to determine + the usage of these values. + + RW, Optional + +offset + The offset constant used in a linear extrapolation model + to determine a hotspot temperature based off the sensor's + raw readings. It is up to the device driver to determine + the usage of these values. + + RW, Optional + +Cooling device attributes +------------------------- + +type + String which represents the type of device, e.g: + + - for generic ACPI: should be "Fan", "Processor" or "LCD" + - for memory controller device on intel_menlow platform: + should be "Memory controller". + + RO, Required + +max_state + The maximum permissible cooling state of this cooling device. + + RO, Required + +cur_state + The current cooling state of this cooling device. + The value can any integer numbers between 0 and max_state: + + - cur_state == 0 means no cooling + - cur_state == max_state means the maximum cooling. + + RW, Required + +stats/reset + Writing any value resets the cooling device's statistics. + WO, Required + +stats/time_in_state_ms: + The amount of time spent by the cooling device in various cooling + states. The output will have "